# Basic imports
from Allohubpy import SAtraj
from Allohubpy import Overlap
from Allohubpy import SANetwork
from Allohubpy import TrajProcessor
from Allohubpy.plotter import Allohub_plots
import numpy as np
from scipy.stats import ttest_ind
from statsmodels.stats.multitest import multipletests
In this example the trajectories will be loaded first and encoded into two different alphabets (M32K25 and 3DI). To ensure faster encoding times removing the water molecules as well as removing unnecessary atoms will speed up the calculations.
The Structural Alphabet handled is initialized and the data is loaded. The package come by default with the M32K25 alphabet as well as the 3DI ones but other alphabets can be provided as a list of possible tokens.
We will use skip 100 to skip the frist 100 frames of each trajectorie (corresponding to 1ns) as equilibration. The argument stride will extract every 10th frame producing an spacing of 1ns between frames.
The first step is to convert the MD trajectories to the structural alphabet of choice
# Other alphabets can be easily added to the analysis using the AbsEncoder class.
# Absencoder takes
# Encoder for 3DI
enc_3di = TrajProcessor.Encoder3DI("encoded_NtrC_repl1_3di.sa")
# Encoder for M32K25
enc_mk = TrajProcessor.SAEncoder("encoded_NtrC_repl1_mk.sa")
# Trajectory is saved every 10 ps. with stride = 10 only the 1-th frames will be processed producing an spacing of 100 ps between frames
# We skip 100 frames -> 1ns as extra equilibration
## Load repl1 of condition 1
enc_3di.load_trajectory(topology="data_NtrC/NtrC_inactive_nowat.pdb", mdtraj="data_NtrC/NtrC_inactive_repl1.xtc", skip=100, stride=10)
enc_3di.encode()
enc_mk.load_trajectory(topology="data_NtrC/NtrC_inactive_nowat.pdb", mdtraj="data_NtrC/NtrC_inactive_repl1.xtc", skip=100, stride=10)
enc_mk.encode()
print("Encoded NtrC inactive repl1")
## Load repl2 of condition 1
enc_3di.set_output_file("encoded_NtrC_repl2_3di.sa")
enc_mk.set_output_file("encoded_NtrC_repl2_mk.sa")
enc_3di.load_trajectory(topology="data_NtrC/NtrC_inactive_nowat.pdb", mdtraj="data_NtrC/NtrC_inactive_repl2.xtc", skip=100, stride=10)
enc_3di.encode()
enc_mk.load_trajectory(topology="data_NtrC/NtrC_inactive_nowat.pdb", mdtraj="data_NtrC/NtrC_inactive_repl2.xtc", skip=100, stride=10)
enc_mk.encode()
print("Encoded NtrC inactive repl2")
## Load repl3 of condition 1
enc_3di.set_output_file("encoded_NtrC_repl3_3di.sa")
enc_mk.set_output_file("encoded_NtrC_repl3_mk.sa")
enc_3di.load_trajectory(topology="data_NtrC/NtrC_inactive_nowat.pdb", mdtraj="data_NtrC/NtrC_inactive_repl3.xtc", skip=100, stride=10)
enc_3di.encode()
enc_mk.load_trajectory(topology="data_NtrC/NtrC_inactive_nowat.pdb", mdtraj="data_NtrC/NtrC_inactive_repl3.xtc", skip=100, stride=10)
enc_mk.encode()
print("Encoded NtrC inactive repl3")
## Load repl1 of condition 2
enc_3di.set_output_file("encoded_NtrC_active_repl1_3di.sa")
enc_mk.set_output_file("encoded_NtrC_active_repl1_mk.sa")
enc_3di.load_trajectory(topology="data_NtrC/NtrC_active_nowat.pdb", mdtraj="data_NtrC/NtrC_active_repl1.xtc", skip=100, stride=10)
enc_3di.encode()
enc_mk.load_trajectory(topology="data_NtrC/NtrC_active_nowat.pdb", mdtraj="data_NtrC/NtrC_active_repl1.xtc", skip=100, stride=10)
enc_mk.encode()
print("Encoded NtrC active repl1")
## Load repl2 of condition 2
enc_3di.set_output_file("encoded_NtrC_active_repl2_3di.sa")
enc_mk.set_output_file("encoded_NtrC_active_repl2_mk.sa")
enc_3di.load_trajectory(topology="data_NtrC/NtrC_active_nowat.pdb", mdtraj="data_NtrC/NtrC_active_repl2.xtc", skip=100, stride=10)
enc_3di.encode()
enc_mk.load_trajectory(topology="data_NtrC/NtrC_active_nowat.pdb", mdtraj="data_NtrC/NtrC_active_repl2.xtc", skip=100, stride=10)
enc_mk.encode()
print("Encoded NtrC active repl2")
## Load repl3 of condition 3
enc_3di.set_output_file("encoded_NtrC_active_repl3_3di.sa")
enc_mk.set_output_file("encoded_NtrC_active_repl3_mk.sa")
enc_3di.load_trajectory(topology="data_NtrC/NtrC_active_nowat.pdb", mdtraj="data_NtrC/NtrC_active_repl3.xtc", skip=100, stride=10)
enc_3di.encode()
enc_mk.load_trajectory(topology="data_NtrC/NtrC_active_nowat.pdb", mdtraj="data_NtrC/NtrC_active_repl3.xtc", skip=100, stride=10)
enc_mk.encode()
print("Encoded NtrC active repl3")
Encoded NtrC inactive repl1 Encoded NtrC inactive repl2 Encoded NtrC inactive repl3 Encoded NtrC active repl1 Encoded NtrC active repl2 Encoded NtrC active repl3
To illustrate the improvements of the improvements of the methodology. Different block sizes will be used for the encoding of the trajectories.
# Initialize Structural Alphabet trajectory handler
print("Initialize Structural Alphabet trajectory handler")
# Set seeds for reproducibility
seed = 42 # Replace with any integer
import random
np.random.seed(seed)
random.seed(seed)
# With randomization
sa_ntrc_active_100_1 = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_active_100_2 = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_active_100_3 = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_inactive_100_1 = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_inactive_100_2 = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_inactive_100_3 = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_active_10_1 = SAtraj.SAtraj(block_size=10, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_active_10_2 = SAtraj.SAtraj(block_size=10, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_active_10_3 = SAtraj.SAtraj(block_size=10, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_inactive_10_1 = SAtraj.SAtraj(block_size=10, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_inactive_10_2 = SAtraj.SAtraj(block_size=10, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_inactive_10_3 = SAtraj.SAtraj(block_size=10, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_active_50_1 = SAtraj.SAtraj(block_size=50, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_active_50_2 = SAtraj.SAtraj(block_size=50, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_active_50_3 = SAtraj.SAtraj(block_size=50, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_inactive_50_1 = SAtraj.SAtraj(block_size=50, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_inactive_50_2 = SAtraj.SAtraj(block_size=50, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_inactive_50_3 = SAtraj.SAtraj(block_size=50, alphabet=SAtraj.ALPHABETS["M32K25"])
# Without randomization
sa_ntrc_active_100_1_norand = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_active_100_2_norand = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_active_100_3_norand = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_inactive_100_1_norand = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_inactive_100_2_norand = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_inactive_100_3_norand = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_active_10_1_norand = SAtraj.SAtraj(block_size=10, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_active_10_2_norand = SAtraj.SAtraj(block_size=10, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_active_10_3_norand = SAtraj.SAtraj(block_size=10, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_inactive_10_1_norand = SAtraj.SAtraj(block_size=10, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_inactive_10_2_norand = SAtraj.SAtraj(block_size=10, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_inactive_10_3_norand = SAtraj.SAtraj(block_size=10, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_active_50_1_norand = SAtraj.SAtraj(block_size=50, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_active_50_2_norand = SAtraj.SAtraj(block_size=50, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_active_50_3_norand = SAtraj.SAtraj(block_size=50, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_inactive_50_1_norand = SAtraj.SAtraj(block_size=50, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_inactive_50_2_norand = SAtraj.SAtraj(block_size=50, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_inactive_50_3_norand = SAtraj.SAtraj(block_size=50, alphabet=SAtraj.ALPHABETS["M32K25"])
# Load encoded data into the model
print("Load encoded data into the model")
sa_ntrc_active_100_1.load_data("data_NtrC/encoded_NtrC_active_repl1_mk.sa")
sa_ntrc_active_100_2.load_data("data_NtrC/encoded_NtrC_active_repl2_mk.sa")
sa_ntrc_active_100_3.load_data("data_NtrC/encoded_NtrC_active_repl3_mk.sa")
sa_ntrc_inactive_100_1.load_data("data_NtrC/encoded_NtrC_inactive_repl1_mk.sa")
sa_ntrc_inactive_100_2.load_data("data_NtrC/encoded_NtrC_inactive_repl2_mk.sa")
sa_ntrc_inactive_100_3.load_data("data_NtrC/encoded_NtrC_inactive_repl3_mk.sa")
sa_ntrc_active_10_1.load_data("data_NtrC/encoded_NtrC_active_repl1_mk.sa")
sa_ntrc_active_10_2.load_data("data_NtrC/encoded_NtrC_active_repl2_mk.sa")
sa_ntrc_active_10_3.load_data("data_NtrC/encoded_NtrC_active_repl3_mk.sa")
sa_ntrc_inactive_10_1.load_data("data_NtrC/encoded_NtrC_inactive_repl1_mk.sa")
sa_ntrc_inactive_10_2.load_data("data_NtrC/encoded_NtrC_inactive_repl2_mk.sa")
sa_ntrc_inactive_10_3.load_data("data_NtrC/encoded_NtrC_inactive_repl3_mk.sa")
sa_ntrc_active_50_1.load_data("data_NtrC/encoded_NtrC_active_repl1_mk.sa")
sa_ntrc_active_50_2.load_data("data_NtrC/encoded_NtrC_active_repl2_mk.sa")
sa_ntrc_active_50_3.load_data("data_NtrC/encoded_NtrC_active_repl3_mk.sa")
sa_ntrc_inactive_50_1.load_data("data_NtrC/encoded_NtrC_inactive_repl1_mk.sa")
sa_ntrc_inactive_50_2.load_data("data_NtrC/encoded_NtrC_inactive_repl2_mk.sa")
sa_ntrc_inactive_50_3.load_data("data_NtrC/encoded_NtrC_inactive_repl3_mk.sa")
sa_ntrc_active_100_1_norand.load_data("data_NtrC/encoded_NtrC_active_repl1_mk.sa", randomize=False)
sa_ntrc_active_100_2_norand.load_data("data_NtrC/encoded_NtrC_active_repl2_mk.sa", randomize=False)
sa_ntrc_active_100_3_norand.load_data("data_NtrC/encoded_NtrC_active_repl3_mk.sa", randomize=False)
sa_ntrc_inactive_100_1_norand.load_data("data_NtrC/encoded_NtrC_inactive_repl1_mk.sa", randomize=False)
sa_ntrc_inactive_100_2_norand.load_data("data_NtrC/encoded_NtrC_inactive_repl2_mk.sa", randomize=False)
sa_ntrc_inactive_100_3_norand.load_data("data_NtrC/encoded_NtrC_inactive_repl3_mk.sa", randomize=False)
sa_ntrc_active_10_1_norand.load_data("data_NtrC/encoded_NtrC_active_repl1_mk.sa", randomize=False)
sa_ntrc_active_10_2_norand.load_data("data_NtrC/encoded_NtrC_active_repl2_mk.sa", randomize=False)
sa_ntrc_active_10_3_norand.load_data("data_NtrC/encoded_NtrC_active_repl3_mk.sa", randomize=False)
sa_ntrc_inactive_10_1_norand.load_data("data_NtrC/encoded_NtrC_inactive_repl1_mk.sa", randomize=False)
sa_ntrc_inactive_10_2_norand.load_data("data_NtrC/encoded_NtrC_inactive_repl2_mk.sa", randomize=False)
sa_ntrc_inactive_10_3_norand.load_data("data_NtrC/encoded_NtrC_inactive_repl3_mk.sa", randomize=False)
sa_ntrc_active_50_1_norand.load_data("data_NtrC/encoded_NtrC_active_repl1_mk.sa", randomize=False)
sa_ntrc_active_50_2_norand.load_data("data_NtrC/encoded_NtrC_active_repl2_mk.sa", randomize=False)
sa_ntrc_active_50_3_norand.load_data("data_NtrC/encoded_NtrC_active_repl3_mk.sa", randomize=False)
sa_ntrc_inactive_50_1_norand.load_data("data_NtrC/encoded_NtrC_inactive_repl1_mk.sa", randomize=False)
sa_ntrc_inactive_50_2_norand.load_data("data_NtrC/encoded_NtrC_inactive_repl2_mk.sa", randomize=False)
sa_ntrc_inactive_50_3_norand.load_data("data_NtrC/encoded_NtrC_inactive_repl3_mk.sa", randomize=False)
Initialize Structural Alphabet trajectory handler Load encoded data into the model
One can examine the encoded structure string as well as all other analysis using the provided plotting functions.
Alternatively, one can addapt the provided plotting functions for other applications. They are all locate din the file Allohub_plots.py
To display the plots the argument action = "show" should be used while for saving to a file it should be action="save" If the save option is provided the name of the file can be specified with name="my_name.png". The format of the image will depend on the format specified in the file name.
# Plot the randomized trajectory of the Structural Alphabet trajectory of NtrC active repl1
Allohub_plots.plot_SA_traj(sa_ntrc_active_100_1_norand.get_int_traj(), SAtraj.ALPHABETS["M32K25"], action="show")
# Plot the randomized trajectory of the Structural Alphabet trajectory of NtrC inactive repl1
Allohub_plots.plot_SA_traj(sa_ntrc_inactive_100_1_norand.get_int_traj(), SAtraj.ALPHABETS["M32K25"], action="show")
Now we will proceed to compute the mutual information for each block. Trajectories with block 100 contain 9 blocks of 100 ns each. Trajectories with block size 50 contain 19 blocks of 50 ns each. Trajectories with block size 10 contains 99 blocks of 10 ns each.
print("Calculate the MI information")
# One can specify the number of workers to parallelize the process. max_workers=None would use all available resources.
mi_ntrc_inactive_100_1_norand = sa_ntrc_inactive_100_1_norand.compute_mis(max_workers=7)
print("NtrC inactive, no randomization, block size 100, repl 1 finished")
mi_ntrc_inactive_100_2_norand = sa_ntrc_inactive_100_2_norand.compute_mis(max_workers=7)
print("NtrC inactive, no randomization, block size 100, repl 2 finished")
mi_ntrc_inactive_100_3_norand = sa_ntrc_inactive_100_3_norand.compute_mis(max_workers=7)
print("NtrC inactive, no randomization, block size 100, repl 3 finished")
mi_ntrc_inactive_50_1_norand = sa_ntrc_inactive_50_1_norand.compute_mis(max_workers=7)
print("NtrC inactive, no randomization, block size 50, repl 1 finished")
mi_ntrc_inactive_50_2_norand = sa_ntrc_inactive_50_2_norand.compute_mis(max_workers=7)
print("NtrC inactive, no randomization, block size 50, repl 2 finished")
mi_ntrc_inactive_50_3_norand = sa_ntrc_inactive_50_3_norand.compute_mis(max_workers=7)
print("NtrC inactive, no randomization, block size 50, repl 3 finished")
mi_ntrc_inactive_10_1_norand = sa_ntrc_inactive_10_1_norand.compute_mis(max_workers=7)
print("NtrC inactive, no randomization, block size 10, repl 1 finished")
mi_ntrc_inactive_10_2_norand = sa_ntrc_inactive_10_2_norand.compute_mis(max_workers=7)
print("NtrC inactive, no randomization, block size 10, repl 2 finished")
mi_ntrc_inactive_10_3_norand = sa_ntrc_inactive_10_3_norand.compute_mis(max_workers=7)
print("NtrC inactive, no randomization, block size 10, repl 3 finished")
mi_ntrc_active_100_1_norand = sa_ntrc_active_100_1_norand.compute_mis(max_workers=7)
print("NtrC active, no randomization, block size 100, repl 1 finished")
mi_ntrc_active_100_2_norand = sa_ntrc_active_100_2_norand.compute_mis(max_workers=7)
print("NtrC active, no randomization, block size 100, repl 2 finished")
mi_ntrc_active_100_3_norand = sa_ntrc_active_100_3_norand.compute_mis(max_workers=7)
print("NtrC active, no randomization, block size 100, repl 3 finished")
mi_ntrc_active_50_1_norand = sa_ntrc_active_50_1_norand.compute_mis(max_workers=7)
print("NtrC active, no randomization, block size 50, repl 1 finished")
mi_ntrc_active_50_2_norand = sa_ntrc_active_50_2_norand.compute_mis(max_workers=7)
print("NtrC active, no randomization, block size 50, repl 2 finished")
mi_ntrc_active_50_3_norand = sa_ntrc_active_50_3_norand.compute_mis(max_workers=7)
print("NtrC active, no randomization, block size 50, repl 3 finished")
mi_ntrc_active_10_1_norand = sa_ntrc_active_10_1_norand.compute_mis(max_workers=7)
print("NtrC active, no randomization, block size 10, repl 1 finished")
mi_ntrc_active_10_2_norand = sa_ntrc_active_10_2_norand.compute_mis(max_workers=7)
print("NtrC active, no randomization, block size 10, repl 2 finished")
mi_ntrc_active_10_3_norand = sa_ntrc_active_10_3_norand.compute_mis(max_workers=7)
print("NtrC active, no randomization, block size 10, repl 3 finished")
mi_ntrc_inactive_100_1 = sa_ntrc_inactive_100_1.compute_mis(max_workers=7)
print("NtrC inactive, randomization, block size 100, repl 1 finished")
mi_ntrc_inactive_100_2 = sa_ntrc_inactive_100_2.compute_mis(max_workers=7)
print("NtrC inactive, randomization, block size 100, repl 2 finished")
mi_ntrc_inactive_100_3 = sa_ntrc_inactive_100_3.compute_mis(max_workers=7)
print("NtrC inactive, randomization, block size 100, repl 3 finished")
mi_ntrc_inactive_50_1 = sa_ntrc_inactive_50_1.compute_mis(max_workers=7)
print("NtrC inactive, randomization, block size 50, repl 1 finished")
mi_ntrc_inactive_50_2 = sa_ntrc_inactive_50_2.compute_mis(max_workers=7)
print("NtrC inactive, randomization, block size 50, repl 2 finished")
mi_ntrc_inactive_50_3 = sa_ntrc_inactive_50_3.compute_mis(max_workers=7)
print("NtrC inactive, randomization, block size 50, repl 3 finished")
mi_ntrc_inactive_10_1 = sa_ntrc_inactive_10_1.compute_mis(max_workers=7)
print("NtrC inactive, randomization, block size 10, repl 1 finished")
mi_ntrc_inactive_10_2 = sa_ntrc_inactive_10_2.compute_mis(max_workers=7)
print("NtrC inactive, randomization, block size 10, repl 2 finished")
mi_ntrc_inactive_10_3 = sa_ntrc_inactive_10_3.compute_mis(max_workers=7)
print("NtrC inactive, randomization, block size 10, repl 3 finished")
mi_ntrc_active_100_1 = sa_ntrc_active_100_1.compute_mis(max_workers=7)
print("NtrC active, randomization, block size 100, repl 1 finished")
mi_ntrc_active_100_2 = sa_ntrc_active_100_2.compute_mis(max_workers=7)
print("NtrC active, randomization, block size 100, repl 2 finished")
mi_ntrc_active_100_3 = sa_ntrc_active_100_3.compute_mis(max_workers=7)
print("NtrC active, randomization, block size 100, repl 3 finished")
mi_ntrc_active_50_1 = sa_ntrc_active_50_1.compute_mis(max_workers=7)
print("NtrC active, randomization, block size 50, repl 1 finished")
mi_ntrc_active_50_2 = sa_ntrc_active_50_2.compute_mis(max_workers=7)
print("NtrC active, randomization, block size 50, repl 2 finished")
mi_ntrc_active_50_3 = sa_ntrc_active_50_3.compute_mis(max_workers=7)
print("NtrC active, randomization, block size 50, repl 3 finished")
mi_ntrc_active_10_1 = sa_ntrc_active_10_1.compute_mis(max_workers=7)
print("NtrC active, randomization, block size 10, repl 1 finished")
mi_ntrc_active_10_2 = sa_ntrc_active_10_2.compute_mis(max_workers=7)
print("NtrC active, randomization, block size 10, repl 2 finished")
mi_ntrc_active_10_3 = sa_ntrc_active_10_3.compute_mis(max_workers=7)
print("NtrC active, randomization, block size 10, repl 3 finished")
Calculate the MI information ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:32<00:00, 3.62s/block]
TRAJECTORY COMPLETED NtrC inactive, no randomization, block size 100, repl 1 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:30<00:00, 3.44s/block]
TRAJECTORY COMPLETED NtrC inactive, no randomization, block size 100, repl 2 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:33<00:00, 3.69s/block]
TRAJECTORY COMPLETED NtrC inactive, no randomization, block size 100, repl 3 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 19/19 [00:42<00:00, 2.26s/block]
TRAJECTORY COMPLETED NtrC inactive, no randomization, block size 50, repl 1 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 19/19 [00:39<00:00, 2.08s/block]
TRAJECTORY COMPLETED NtrC inactive, no randomization, block size 50, repl 2 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 19/19 [00:41<00:00, 2.17s/block]
TRAJECTORY COMPLETED NtrC inactive, no randomization, block size 50, repl 3 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 99/99 [02:50<00:00, 1.72s/block]
TRAJECTORY COMPLETED NtrC inactive, no randomization, block size 10, repl 1 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 99/99 [02:40<00:00, 1.62s/block]
TRAJECTORY COMPLETED NtrC inactive, no randomization, block size 10, repl 2 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 99/99 [02:49<00:00, 1.71s/block]
TRAJECTORY COMPLETED NtrC inactive, no randomization, block size 10, repl 3 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:33<00:00, 3.67s/block]
TRAJECTORY COMPLETED NtrC active, no randomization, block size 100, repl 1 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:31<00:00, 3.55s/block]
TRAJECTORY COMPLETED NtrC active, no randomization, block size 100, repl 2 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:32<00:00, 3.58s/block]
TRAJECTORY COMPLETED NtrC active, no randomization, block size 100, repl 3 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 19/19 [00:41<00:00, 2.20s/block]
TRAJECTORY COMPLETED NtrC active, no randomization, block size 50, repl 1 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 19/19 [00:41<00:00, 2.17s/block]
TRAJECTORY COMPLETED NtrC active, no randomization, block size 50, repl 2 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 19/19 [00:41<00:00, 2.16s/block]
TRAJECTORY COMPLETED NtrC active, no randomization, block size 50, repl 3 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 99/99 [02:39<00:00, 1.61s/block]
TRAJECTORY COMPLETED NtrC active, no randomization, block size 10, repl 1 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 99/99 [02:46<00:00, 1.68s/block]
TRAJECTORY COMPLETED NtrC active, no randomization, block size 10, repl 2 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 99/99 [02:41<00:00, 1.63s/block]
TRAJECTORY COMPLETED NtrC active, no randomization, block size 10, repl 3 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:29<00:00, 3.26s/block]
TRAJECTORY COMPLETED NtrC inactive, randomization, block size 100, repl 1 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:33<00:00, 3.69s/block]
TRAJECTORY COMPLETED NtrC inactive, randomization, block size 100, repl 2 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:31<00:00, 3.52s/block]
TRAJECTORY COMPLETED NtrC inactive, randomization, block size 100, repl 3 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 19/19 [00:42<00:00, 2.22s/block]
TRAJECTORY COMPLETED NtrC inactive, randomization, block size 50, repl 1 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 19/19 [00:42<00:00, 2.22s/block]
TRAJECTORY COMPLETED NtrC inactive, randomization, block size 50, repl 2 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 19/19 [00:42<00:00, 2.21s/block]
TRAJECTORY COMPLETED NtrC inactive, randomization, block size 50, repl 3 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 99/99 [02:41<00:00, 1.63s/block]
TRAJECTORY COMPLETED NtrC inactive, randomization, block size 10, repl 1 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 99/99 [02:42<00:00, 1.64s/block]
TRAJECTORY COMPLETED NtrC inactive, randomization, block size 10, repl 2 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 99/99 [02:46<00:00, 1.68s/block]
TRAJECTORY COMPLETED NtrC inactive, randomization, block size 10, repl 3 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:33<00:00, 3.75s/block]
TRAJECTORY COMPLETED NtrC active, randomization, block size 100, repl 1 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:28<00:00, 3.16s/block]
TRAJECTORY COMPLETED NtrC active, randomization, block size 100, repl 2 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:31<00:00, 3.49s/block]
TRAJECTORY COMPLETED NtrC active, randomization, block size 100, repl 3 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 19/19 [00:41<00:00, 2.20s/block]
TRAJECTORY COMPLETED NtrC active, randomization, block size 50, repl 1 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 19/19 [00:42<00:00, 2.22s/block]
TRAJECTORY COMPLETED NtrC active, randomization, block size 50, repl 2 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 19/19 [00:41<00:00, 2.20s/block]
TRAJECTORY COMPLETED NtrC active, randomization, block size 50, repl 3 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 99/99 [02:45<00:00, 1.67s/block]
TRAJECTORY COMPLETED NtrC active, randomization, block size 10, repl 1 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 99/99 [02:51<00:00, 1.73s/block]
TRAJECTORY COMPLETED NtrC active, randomization, block size 10, repl 2 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 99/99 [02:54<00:00, 1.76s/block]
TRAJECTORY COMPLETED NtrC active, randomization, block size 10, repl 3 finished
The eigenvector decomposition of the obtained MI matrices can be used to asses convergence. The main motions of well converged simulations should have relatively high eigenvector overlap (>0.3).
Overlap between replicates can be used as a way to asses relaiability of the results. (Higher convergences = Higher confidences)
# Do an eigenvector decomposition of the matrices
from tqdm import tqdm
print("Do an eigenvector decomposition of the matrices")
for mi_tr in tqdm(mi_ntrc_inactive_100_1_norand, desc="Eigenvector decomposition for Ntrc 100 norand rep1", unit="matrix"):
mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_inactive_100_2_norand, desc="Eigenvector decomposition for Ntrc 100 norand rep2", unit="matrix"):
mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_inactive_100_3_norand, desc="Eigenvector decomposition for Ntrc 100 norand rep3", unit="matrix"):
mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_inactive_50_1_norand, desc="Eigenvector decomposition for Ntrc 50 norand rep1", unit="matrix"):
mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_inactive_50_2_norand, desc="Eigenvector decomposition for Ntrc 50 norand rep2", unit="matrix"):
mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_inactive_50_3_norand, desc="Eigenvector decomposition for Ntrc 50 norand rep3", unit="matrix"):
mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_inactive_10_1_norand, desc="Eigenvector decomposition for Ntrc 10 norand rep1", unit="matrix"):
mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_inactive_10_2_norand, desc="Eigenvector decomposition for Ntrc 10 norand rep2", unit="matrix"):
mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_inactive_10_3_norand, desc="Eigenvector decomposition for Ntrc 10 norand rep3", unit="matrix"):
mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_inactive_100_1, desc="Eigenvector decomposition for Ntrc 100 rand rep1", unit="matrix"):
mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_inactive_100_2, desc="Eigenvector decomposition for Ntrc 100 rand rep2", unit="matrix"):
mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_inactive_100_3, desc="Eigenvector decomposition for Ntrc 100 rand rep3", unit="matrix"):
mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_inactive_50_1, desc="Eigenvector decomposition for Ntrc 50 rand rep1", unit="matrix"):
mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_inactive_50_2, desc="Eigenvector decomposition for Ntrc 50 rand rep2", unit="matrix"):
mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_inactive_50_3, desc="Eigenvector decomposition for Ntrc 50 rand rep3", unit="matrix"):
mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_inactive_10_1, desc="Eigenvector decomposition for Ntrc 10 rand rep1", unit="matrix"):
mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_inactive_10_2, desc="Eigenvector decomposition for Ntrc 10 rand rep2", unit="matrix"):
mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_inactive_10_3, desc="Eigenvector decomposition for Ntrc 10 rand rep3", unit="matrix"):
mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_active_100_1_norand, desc="Eigenvector decomposition for Ntrc 100 act norand rep1", unit="matrix"):
mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_active_100_2_norand, desc="Eigenvector decomposition for Ntrc 100 act norand rep2", unit="matrix"):
mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_active_100_3_norand, desc="Eigenvector decomposition for Ntrc 100 act norand rep3", unit="matrix"):
mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_active_50_1_norand, desc="Eigenvector decomposition for Ntrc 50 act norand rep1", unit="matrix"):
mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_active_50_2_norand, desc="Eigenvector decomposition for Ntrc 50 act norand rep2", unit="matrix"):
mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_active_50_3_norand, desc="Eigenvector decomposition for Ntrc 50 act norand rep3", unit="matrix"):
mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_active_10_1_norand, desc="Eigenvector decomposition for Ntrc 10 act norand rep1", unit="matrix"):
mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_active_10_2_norand, desc="Eigenvector decomposition for Ntrc 10 act norand rep2", unit="matrix"):
mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_active_10_3_norand, desc="Eigenvector decomposition for Ntrc act 10 norand rep3", unit="matrix"):
mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_active_100_1, desc="Eigenvector decomposition for Ntrc act 100 rand rep1", unit="matrix"):
mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_active_100_2, desc="Eigenvector decomposition for Ntrc act 100 rand rep2", unit="matrix"):
mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_active_100_3, desc="Eigenvector decomposition for Ntrc act 100 rand rep3", unit="matrix"):
mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_active_50_1, desc="Eigenvector decomposition for Ntrc act 50 rand rep1", unit="matrix"):
mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_active_50_2, desc="Eigenvector decomposition for Ntrc act 50 rand rep2", unit="matrix"):
mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_active_50_3, desc="Eigenvector decomposition for Ntrc act 50 rand rep3", unit="matrix"):
mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_active_10_1, desc="Eigenvector decomposition for Ntrc act 10 rand rep1", unit="matrix"):
mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_active_10_2, desc="Eigenvector decomposition for Ntrc act 10 rand rep2", unit="matrix"):
mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_active_10_3, desc="Eigenvector decomposition for Ntrc act 10 rand rep3", unit="matrix"):
mi_tr.compute_eigensystem()
Do an eigenvector decomposition of the matrices
Eigenvector decomposition for Ntrc 100 norand rep1: 0%| | 0/9 [00:00<?, ?matrix/s]Eigenvector decomposition for Ntrc 100 norand rep1: 100%|██████████| 9/9 [00:00<00:00, 54.28matrix/s] Eigenvector decomposition for Ntrc 100 norand rep2: 100%|██████████| 9/9 [00:00<00:00, 105.99matrix/s] Eigenvector decomposition for Ntrc 100 norand rep3: 100%|██████████| 9/9 [00:00<00:00, 105.23matrix/s] Eigenvector decomposition for Ntrc 50 norand rep1: 100%|██████████| 19/19 [00:00<00:00, 35.71matrix/s] Eigenvector decomposition for Ntrc 50 norand rep2: 100%|██████████| 19/19 [00:00<00:00, 196.42matrix/s] Eigenvector decomposition for Ntrc 50 norand rep3: 100%|██████████| 19/19 [00:00<00:00, 214.76matrix/s] Eigenvector decomposition for Ntrc 10 norand rep1: 100%|██████████| 99/99 [00:00<00:00, 239.07matrix/s] Eigenvector decomposition for Ntrc 10 norand rep2: 100%|██████████| 99/99 [00:00<00:00, 364.18matrix/s] Eigenvector decomposition for Ntrc 10 norand rep3: 100%|██████████| 99/99 [00:00<00:00, 487.15matrix/s] Eigenvector decomposition for Ntrc 100 rand rep1: 100%|██████████| 9/9 [00:00<00:00, 363.07matrix/s] Eigenvector decomposition for Ntrc 100 rand rep2: 100%|██████████| 9/9 [00:00<00:00, 324.89matrix/s] Eigenvector decomposition for Ntrc 100 rand rep3: 100%|██████████| 9/9 [00:00<00:00, 304.61matrix/s] Eigenvector decomposition for Ntrc 50 rand rep1: 100%|██████████| 19/19 [00:00<00:00, 676.68matrix/s] Eigenvector decomposition for Ntrc 50 rand rep2: 100%|██████████| 19/19 [00:00<00:00, 627.87matrix/s] Eigenvector decomposition for Ntrc 50 rand rep3: 100%|██████████| 19/19 [00:00<00:00, 681.67matrix/s] Eigenvector decomposition for Ntrc 10 rand rep1: 100%|██████████| 99/99 [00:00<00:00, 541.86matrix/s] Eigenvector decomposition for Ntrc 10 rand rep2: 100%|██████████| 99/99 [00:00<00:00, 679.07matrix/s] Eigenvector decomposition for Ntrc 10 rand rep3: 100%|██████████| 99/99 [00:00<00:00, 745.75matrix/s] Eigenvector decomposition for Ntrc 100 act norand rep1: 100%|██████████| 9/9 [00:00<00:00, 623.61matrix/s] Eigenvector decomposition for Ntrc 100 act norand rep2: 100%|██████████| 9/9 [00:00<00:00, 730.59matrix/s] Eigenvector decomposition for Ntrc 100 act norand rep3: 100%|██████████| 9/9 [00:00<00:00, 326.91matrix/s] Eigenvector decomposition for Ntrc 50 act norand rep1: 100%|██████████| 19/19 [00:00<00:00, 384.88matrix/s] Eigenvector decomposition for Ntrc 50 act norand rep2: 100%|██████████| 19/19 [00:00<00:00, 344.99matrix/s] Eigenvector decomposition for Ntrc 50 act norand rep3: 100%|██████████| 19/19 [00:00<00:00, 496.98matrix/s] Eigenvector decomposition for Ntrc 10 act norand rep1: 100%|██████████| 99/99 [00:00<00:00, 886.82matrix/s] Eigenvector decomposition for Ntrc 10 act norand rep2: 100%|██████████| 99/99 [00:00<00:00, 661.50matrix/s] Eigenvector decomposition for Ntrc act 10 norand rep3: 100%|██████████| 99/99 [00:00<00:00, 569.02matrix/s] Eigenvector decomposition for Ntrc act 100 rand rep1: 100%|██████████| 9/9 [00:00<00:00, 462.83matrix/s] Eigenvector decomposition for Ntrc act 100 rand rep2: 100%|██████████| 9/9 [00:00<00:00, 393.17matrix/s] Eigenvector decomposition for Ntrc act 100 rand rep3: 100%|██████████| 9/9 [00:00<00:00, 401.86matrix/s] Eigenvector decomposition for Ntrc act 50 rand rep1: 100%|██████████| 19/19 [00:00<00:00, 314.98matrix/s] Eigenvector decomposition for Ntrc act 50 rand rep2: 100%|██████████| 19/19 [00:00<00:00, 795.24matrix/s] Eigenvector decomposition for Ntrc act 50 rand rep3: 100%|██████████| 19/19 [00:00<00:00, 809.10matrix/s] Eigenvector decomposition for Ntrc act 10 rand rep1: 100%|██████████| 99/99 [00:00<00:00, 632.15matrix/s] Eigenvector decomposition for Ntrc act 10 rand rep2: 100%|██████████| 99/99 [00:00<00:00, 891.42matrix/s] Eigenvector decomposition for Ntrc act 10 rand rep3: 100%|██████████| 99/99 [00:00<00:00, 537.66matrix/s]
Overlap can be now computed using the Overlap object.
In this analysis we are using the top 3 eigenvectors which should explain most of the observed variability.
# Create the overlap handler to compute similarities between the trajectories
# Random 100
overlap_100 = Overlap.Overlap([mi_ntrc_inactive_100_1, mi_ntrc_inactive_100_2, mi_ntrc_inactive_100_3,
mi_ntrc_active_100_1, mi_ntrc_active_100_2, mi_ntrc_active_100_3], ev_list=[0,1,2])
overlap_100.fill_overlap_matrix()
# No Random 100
overlap_100_norand = Overlap.Overlap([mi_ntrc_inactive_100_1_norand, mi_ntrc_inactive_100_2_norand, mi_ntrc_inactive_100_3_norand,
mi_ntrc_active_100_1_norand, mi_ntrc_active_100_2_norand, mi_ntrc_active_100_3_norand], ev_list=[0,1,2])
overlap_100_norand.fill_overlap_matrix()
# Random 50
overlap_50 = Overlap.Overlap([mi_ntrc_inactive_50_1, mi_ntrc_inactive_50_2, mi_ntrc_inactive_50_3,
mi_ntrc_active_50_1, mi_ntrc_active_50_2, mi_ntrc_active_50_3], ev_list=[0,1,2])
overlap_50.fill_overlap_matrix()
# No Random 50
overlap_50_norand = Overlap.Overlap([mi_ntrc_inactive_50_1_norand, mi_ntrc_inactive_50_2_norand, mi_ntrc_inactive_50_3_norand,
mi_ntrc_active_50_1_norand, mi_ntrc_active_50_2_norand, mi_ntrc_active_50_3_norand], ev_list=[0,1,2])
overlap_50_norand.fill_overlap_matrix()
# Random 10
overlap_10 = Overlap.Overlap([mi_ntrc_inactive_10_1, mi_ntrc_inactive_10_2, mi_ntrc_inactive_10_3,
mi_ntrc_active_10_1, mi_ntrc_active_10_2, mi_ntrc_active_10_3], ev_list=[0,1,2])
overlap_10.fill_overlap_matrix()
# No Random 10
overlap_10_norand = Overlap.Overlap([mi_ntrc_inactive_10_1_norand, mi_ntrc_inactive_10_2_norand, mi_ntrc_inactive_10_3_norand,
mi_ntrc_active_10_1_norand, mi_ntrc_active_10_2_norand, mi_ntrc_active_10_3_norand], ev_list=[0,1,2])
overlap_10_norand.fill_overlap_matrix()
The results can now be plotted to examine the convergence
# Overlap for block 100 randomized
Allohub_plots.plot_overlap(overlap_100.get_overlap_matrix(), vmax=0.4, action="show")
# Overlap for block 100 no randomized
Allohub_plots.plot_overlap(overlap_100_norand.get_overlap_matrix(), vmax=0.4, action="show")
# Overlap for block 50 randomized
Allohub_plots.plot_overlap(overlap_50.get_overlap_matrix(), vmax=0.4, action="show")
# Overlap for block 50 no randomized
Allohub_plots.plot_overlap(overlap_50_norand.get_overlap_matrix(), vmax=0.4, action="show")
# Overlap for block 10 randomized
Allohub_plots.plot_overlap(overlap_10.get_overlap_matrix(), vmax=0.4, action="show")
# Overlap for block 10 no randomized
Allohub_plots.plot_overlap(overlap_10_norand.get_overlap_matrix(), vmax=0.4, action="show")
# Compute similarities for the 100 block size encodings
similarity_matrix_100_norand = overlap_100_norand.compute_similarities()
similarity_matrix_100 = overlap_100.compute_similarities()
similarity_matrix_50_norand = overlap_50_norand.compute_similarities()
similarity_matrix_50 = overlap_50.compute_similarities()
similarity_matrix_10_norand = overlap_10_norand.compute_similarities()
similarity_matrix_10 = overlap_10.compute_similarities()
SIMILARITIES BETWEEN TRAJECTORY 0 and 1 Overlap between trajectories: 0.1965 Overlap of trajectory 1 with itself: 0.3076 Overlap of trajectory 2 with itself: 0.3663 Similary 0.8596 SIMILARITIES BETWEEN TRAJECTORY 0 and 2 Overlap between trajectories: 0.1925 Overlap of trajectory 1 with itself: 0.3076 Overlap of trajectory 2 with itself: 0.2977 Similary 0.8898 SIMILARITIES BETWEEN TRAJECTORY 0 and 3 Overlap between trajectories: 0.1557 Overlap of trajectory 1 with itself: 0.3076 Overlap of trajectory 2 with itself: 0.3256 Similary 0.8391 SIMILARITIES BETWEEN TRAJECTORY 0 and 4 Overlap between trajectories: 0.1438 Overlap of trajectory 1 with itself: 0.3076 Overlap of trajectory 2 with itself: 0.3039 Similary 0.8381 SIMILARITIES BETWEEN TRAJECTORY 0 and 5 Overlap between trajectories: 0.1421 Overlap of trajectory 1 with itself: 0.3076 Overlap of trajectory 2 with itself: 0.2909 Similary 0.8428 SIMILARITIES BETWEEN TRAJECTORY 1 and 2 Overlap between trajectories: 0.1934 Overlap of trajectory 1 with itself: 0.3663 Overlap of trajectory 2 with itself: 0.2977 Similary 0.8614 SIMILARITIES BETWEEN TRAJECTORY 1 and 3 Overlap between trajectories: 0.1499 Overlap of trajectory 1 with itself: 0.3663 Overlap of trajectory 2 with itself: 0.3256 Similary 0.804 SIMILARITIES BETWEEN TRAJECTORY 1 and 4 Overlap between trajectories: 0.1562 Overlap of trajectory 1 with itself: 0.3663 Overlap of trajectory 2 with itself: 0.3039 Similary 0.8212 SIMILARITIES BETWEEN TRAJECTORY 1 and 5 Overlap between trajectories: 0.17 Overlap of trajectory 1 with itself: 0.3663 Overlap of trajectory 2 with itself: 0.2909 Similary 0.8414 SIMILARITIES BETWEEN TRAJECTORY 2 and 3 Overlap between trajectories: 0.1575 Overlap of trajectory 1 with itself: 0.2977 Overlap of trajectory 2 with itself: 0.3256 Similary 0.8458 SIMILARITIES BETWEEN TRAJECTORY 2 and 4 Overlap between trajectories: 0.1559 Overlap of trajectory 1 with itself: 0.2977 Overlap of trajectory 2 with itself: 0.3039 Similary 0.8551 SIMILARITIES BETWEEN TRAJECTORY 2 and 5 Overlap between trajectories: 0.1455 Overlap of trajectory 1 with itself: 0.2977 Overlap of trajectory 2 with itself: 0.2909 Similary 0.8512 SIMILARITIES BETWEEN TRAJECTORY 3 and 4 Overlap between trajectories: 0.1514 Overlap of trajectory 1 with itself: 0.3256 Overlap of trajectory 2 with itself: 0.3039 Similary 0.8367 SIMILARITIES BETWEEN TRAJECTORY 3 and 5 Overlap between trajectories: 0.1412 Overlap of trajectory 1 with itself: 0.3256 Overlap of trajectory 2 with itself: 0.2909 Similary 0.8329 SIMILARITIES BETWEEN TRAJECTORY 4 and 5 Overlap between trajectories: 0.1612 Overlap of trajectory 1 with itself: 0.3039 Overlap of trajectory 2 with itself: 0.2909 Similary 0.8638 SIMILARITIES BETWEEN TRAJECTORY 0 and 1 Overlap between trajectories: 0.2601 Overlap of trajectory 1 with itself: 0.5077 Overlap of trajectory 2 with itself: 0.5491 Similary 0.7317 SIMILARITIES BETWEEN TRAJECTORY 0 and 2 Overlap between trajectories: 0.2776 Overlap of trajectory 1 with itself: 0.5077 Overlap of trajectory 2 with itself: 0.4596 Similary 0.7939 SIMILARITIES BETWEEN TRAJECTORY 0 and 3 Overlap between trajectories: 0.2513 Overlap of trajectory 1 with itself: 0.5077 Overlap of trajectory 2 with itself: 0.5566 Similary 0.7191 SIMILARITIES BETWEEN TRAJECTORY 0 and 4 Overlap between trajectories: 0.2028 Overlap of trajectory 1 with itself: 0.5077 Overlap of trajectory 2 with itself: 0.5372 Similary 0.6803 SIMILARITIES BETWEEN TRAJECTORY 0 and 5 Overlap between trajectories: 0.2106 Overlap of trajectory 1 with itself: 0.5077 Overlap of trajectory 2 with itself: 0.5158 Similary 0.6988 SIMILARITIES BETWEEN TRAJECTORY 1 and 2 Overlap between trajectories: 0.2559 Overlap of trajectory 1 with itself: 0.5491 Overlap of trajectory 2 with itself: 0.4596 Similary 0.7515 SIMILARITIES BETWEEN TRAJECTORY 1 and 3 Overlap between trajectories: 0.1944 Overlap of trajectory 1 with itself: 0.5491 Overlap of trajectory 2 with itself: 0.5566 Similary 0.6416 SIMILARITIES BETWEEN TRAJECTORY 1 and 4 Overlap between trajectories: 0.2403 Overlap of trajectory 1 with itself: 0.5491 Overlap of trajectory 2 with itself: 0.5372 Similary 0.6971 SIMILARITIES BETWEEN TRAJECTORY 1 and 5 Overlap between trajectories: 0.2074 Overlap of trajectory 1 with itself: 0.5491 Overlap of trajectory 2 with itself: 0.5158 Similary 0.6749 SIMILARITIES BETWEEN TRAJECTORY 2 and 3 Overlap between trajectories: 0.2186 Overlap of trajectory 1 with itself: 0.4596 Overlap of trajectory 2 with itself: 0.5566 Similary 0.7105 SIMILARITIES BETWEEN TRAJECTORY 2 and 4 Overlap between trajectories: 0.199 Overlap of trajectory 1 with itself: 0.4596 Overlap of trajectory 2 with itself: 0.5372 Similary 0.7006 SIMILARITIES BETWEEN TRAJECTORY 2 and 5 Overlap between trajectories: 0.1805 Overlap of trajectory 1 with itself: 0.4596 Overlap of trajectory 2 with itself: 0.5158 Similary 0.6928 SIMILARITIES BETWEEN TRAJECTORY 3 and 4 Overlap between trajectories: 0.2327 Overlap of trajectory 1 with itself: 0.5566 Overlap of trajectory 2 with itself: 0.5372 Similary 0.6858 SIMILARITIES BETWEEN TRAJECTORY 3 and 5 Overlap between trajectories: 0.2145 Overlap of trajectory 1 with itself: 0.5566 Overlap of trajectory 2 with itself: 0.5158 Similary 0.6784 SIMILARITIES BETWEEN TRAJECTORY 4 and 5 Overlap between trajectories: 0.2812 Overlap of trajectory 1 with itself: 0.5372 Overlap of trajectory 2 with itself: 0.5158 Similary 0.7547 SIMILARITIES BETWEEN TRAJECTORY 0 and 1 Overlap between trajectories: 0.1842 Overlap of trajectory 1 with itself: 0.2578 Overlap of trajectory 2 with itself: 0.2693 Similary 0.9206 SIMILARITIES BETWEEN TRAJECTORY 0 and 2 Overlap between trajectories: 0.195 Overlap of trajectory 1 with itself: 0.2578 Overlap of trajectory 2 with itself: 0.2529 Similary 0.9396 SIMILARITIES BETWEEN TRAJECTORY 0 and 3 Overlap between trajectories: 0.1604 Overlap of trajectory 1 with itself: 0.2578 Overlap of trajectory 2 with itself: 0.2626 Similary 0.9002 SIMILARITIES BETWEEN TRAJECTORY 0 and 4 Overlap between trajectories: 0.1498 Overlap of trajectory 1 with itself: 0.2578 Overlap of trajectory 2 with itself: 0.2613 Similary 0.8902 SIMILARITIES BETWEEN TRAJECTORY 0 and 5 Overlap between trajectories: 0.1539 Overlap of trajectory 1 with itself: 0.2578 Overlap of trajectory 2 with itself: 0.2416 Similary 0.9042 SIMILARITIES BETWEEN TRAJECTORY 1 and 2 Overlap between trajectories: 0.1809 Overlap of trajectory 1 with itself: 0.2693 Overlap of trajectory 2 with itself: 0.2529 Similary 0.9198 SIMILARITIES BETWEEN TRAJECTORY 1 and 3 Overlap between trajectories: 0.1513 Overlap of trajectory 1 with itself: 0.2693 Overlap of trajectory 2 with itself: 0.2626 Similary 0.8854 SIMILARITIES BETWEEN TRAJECTORY 1 and 4 Overlap between trajectories: 0.1518 Overlap of trajectory 1 with itself: 0.2693 Overlap of trajectory 2 with itself: 0.2613 Similary 0.8864 SIMILARITIES BETWEEN TRAJECTORY 1 and 5 Overlap between trajectories: 0.1625 Overlap of trajectory 1 with itself: 0.2693 Overlap of trajectory 2 with itself: 0.2416 Similary 0.9071 SIMILARITIES BETWEEN TRAJECTORY 2 and 3 Overlap between trajectories: 0.1596 Overlap of trajectory 1 with itself: 0.2529 Overlap of trajectory 2 with itself: 0.2626 Similary 0.9018 SIMILARITIES BETWEEN TRAJECTORY 2 and 4 Overlap between trajectories: 0.156 Overlap of trajectory 1 with itself: 0.2529 Overlap of trajectory 2 with itself: 0.2613 Similary 0.8989 SIMILARITIES BETWEEN TRAJECTORY 2 and 5 Overlap between trajectories: 0.1534 Overlap of trajectory 1 with itself: 0.2529 Overlap of trajectory 2 with itself: 0.2416 Similary 0.9061 SIMILARITIES BETWEEN TRAJECTORY 3 and 4 Overlap between trajectories: 0.1476 Overlap of trajectory 1 with itself: 0.2626 Overlap of trajectory 2 with itself: 0.2613 Similary 0.8856 SIMILARITIES BETWEEN TRAJECTORY 3 and 5 Overlap between trajectories: 0.1423 Overlap of trajectory 1 with itself: 0.2626 Overlap of trajectory 2 with itself: 0.2416 Similary 0.8902 SIMILARITIES BETWEEN TRAJECTORY 4 and 5 Overlap between trajectories: 0.1628 Overlap of trajectory 1 with itself: 0.2613 Overlap of trajectory 2 with itself: 0.2416 Similary 0.9114 SIMILARITIES BETWEEN TRAJECTORY 0 and 1 Overlap between trajectories: 0.2513 Overlap of trajectory 1 with itself: 0.4236 Overlap of trajectory 2 with itself: 0.4495 Similary 0.8147 SIMILARITIES BETWEEN TRAJECTORY 0 and 2 Overlap between trajectories: 0.281 Overlap of trajectory 1 with itself: 0.4236 Overlap of trajectory 2 with itself: 0.4026 Similary 0.8679 SIMILARITIES BETWEEN TRAJECTORY 0 and 3 Overlap between trajectories: 0.2443 Overlap of trajectory 1 with itself: 0.4236 Overlap of trajectory 2 with itself: 0.4281 Similary 0.8184 SIMILARITIES BETWEEN TRAJECTORY 0 and 4 Overlap between trajectories: 0.2012 Overlap of trajectory 1 with itself: 0.4236 Overlap of trajectory 2 with itself: 0.401 Similary 0.7889 SIMILARITIES BETWEEN TRAJECTORY 0 and 5 Overlap between trajectories: 0.222 Overlap of trajectory 1 with itself: 0.4236 Overlap of trajectory 2 with itself: 0.4349 Similary 0.7927 SIMILARITIES BETWEEN TRAJECTORY 1 and 2 Overlap between trajectories: 0.2615 Overlap of trajectory 1 with itself: 0.4495 Overlap of trajectory 2 with itself: 0.4026 Similary 0.8355 SIMILARITIES BETWEEN TRAJECTORY 1 and 3 Overlap between trajectories: 0.199 Overlap of trajectory 1 with itself: 0.4495 Overlap of trajectory 2 with itself: 0.4281 Similary 0.7602 SIMILARITIES BETWEEN TRAJECTORY 1 and 4 Overlap between trajectories: 0.2272 Overlap of trajectory 1 with itself: 0.4495 Overlap of trajectory 2 with itself: 0.401 Similary 0.8019 SIMILARITIES BETWEEN TRAJECTORY 1 and 5 Overlap between trajectories: 0.2189 Overlap of trajectory 1 with itself: 0.4495 Overlap of trajectory 2 with itself: 0.4349 Similary 0.7767 SIMILARITIES BETWEEN TRAJECTORY 2 and 3 Overlap between trajectories: 0.222 Overlap of trajectory 1 with itself: 0.4026 Overlap of trajectory 2 with itself: 0.4281 Similary 0.8067 SIMILARITIES BETWEEN TRAJECTORY 2 and 4 Overlap between trajectories: 0.2038 Overlap of trajectory 1 with itself: 0.4026 Overlap of trajectory 2 with itself: 0.401 Similary 0.802 SIMILARITIES BETWEEN TRAJECTORY 2 and 5 Overlap between trajectories: 0.1988 Overlap of trajectory 1 with itself: 0.4026 Overlap of trajectory 2 with itself: 0.4349 Similary 0.7801 SIMILARITIES BETWEEN TRAJECTORY 3 and 4 Overlap between trajectories: 0.2175 Overlap of trajectory 1 with itself: 0.4281 Overlap of trajectory 2 with itself: 0.401 Similary 0.8029 SIMILARITIES BETWEEN TRAJECTORY 3 and 5 Overlap between trajectories: 0.2072 Overlap of trajectory 1 with itself: 0.4281 Overlap of trajectory 2 with itself: 0.4349 Similary 0.7757 SIMILARITIES BETWEEN TRAJECTORY 4 and 5 Overlap between trajectories: 0.2628 Overlap of trajectory 1 with itself: 0.401 Overlap of trajectory 2 with itself: 0.4349 Similary 0.8448 SIMILARITIES BETWEEN TRAJECTORY 0 and 1 Overlap between trajectories: 0.2118 Overlap of trajectory 1 with itself: 0.2575 Overlap of trajectory 2 with itself: 0.2411 Similary 0.9625 SIMILARITIES BETWEEN TRAJECTORY 0 and 2 Overlap between trajectories: 0.2274 Overlap of trajectory 1 with itself: 0.2575 Overlap of trajectory 2 with itself: 0.2545 Similary 0.9713 SIMILARITIES BETWEEN TRAJECTORY 0 and 3 Overlap between trajectories: 0.2015 Overlap of trajectory 1 with itself: 0.2575 Overlap of trajectory 2 with itself: 0.2576 Similary 0.944 SIMILARITIES BETWEEN TRAJECTORY 0 and 4 Overlap between trajectories: 0.1935 Overlap of trajectory 1 with itself: 0.2575 Overlap of trajectory 2 with itself: 0.2667 Similary 0.9314 SIMILARITIES BETWEEN TRAJECTORY 0 and 5 Overlap between trajectories: 0.2123 Overlap of trajectory 1 with itself: 0.2575 Overlap of trajectory 2 with itself: 0.2584 Similary 0.9543 SIMILARITIES BETWEEN TRAJECTORY 1 and 2 Overlap between trajectories: 0.2195 Overlap of trajectory 1 with itself: 0.2411 Overlap of trajectory 2 with itself: 0.2545 Similary 0.9717 SIMILARITIES BETWEEN TRAJECTORY 1 and 3 Overlap between trajectories: 0.1934 Overlap of trajectory 1 with itself: 0.2411 Overlap of trajectory 2 with itself: 0.2576 Similary 0.944 SIMILARITIES BETWEEN TRAJECTORY 1 and 4 Overlap between trajectories: 0.1972 Overlap of trajectory 1 with itself: 0.2411 Overlap of trajectory 2 with itself: 0.2667 Similary 0.9433 SIMILARITIES BETWEEN TRAJECTORY 1 and 5 Overlap between trajectories: 0.205 Overlap of trajectory 1 with itself: 0.2411 Overlap of trajectory 2 with itself: 0.2584 Similary 0.9553 SIMILARITIES BETWEEN TRAJECTORY 2 and 3 Overlap between trajectories: 0.2047 Overlap of trajectory 1 with itself: 0.2545 Overlap of trajectory 2 with itself: 0.2576 Similary 0.9486 SIMILARITIES BETWEEN TRAJECTORY 2 and 4 Overlap between trajectories: 0.1986 Overlap of trajectory 1 with itself: 0.2545 Overlap of trajectory 2 with itself: 0.2667 Similary 0.9379 SIMILARITIES BETWEEN TRAJECTORY 2 and 5 Overlap between trajectories: 0.2131 Overlap of trajectory 1 with itself: 0.2545 Overlap of trajectory 2 with itself: 0.2584 Similary 0.9566 SIMILARITIES BETWEEN TRAJECTORY 3 and 4 Overlap between trajectories: 0.1956 Overlap of trajectory 1 with itself: 0.2576 Overlap of trajectory 2 with itself: 0.2667 Similary 0.9335 SIMILARITIES BETWEEN TRAJECTORY 3 and 5 Overlap between trajectories: 0.2054 Overlap of trajectory 1 with itself: 0.2576 Overlap of trajectory 2 with itself: 0.2584 Similary 0.9474 SIMILARITIES BETWEEN TRAJECTORY 4 and 5 Overlap between trajectories: 0.2093 Overlap of trajectory 1 with itself: 0.2667 Overlap of trajectory 2 with itself: 0.2584 Similary 0.9467 SIMILARITIES BETWEEN TRAJECTORY 0 and 1 Overlap between trajectories: 0.2709 Overlap of trajectory 1 with itself: 0.3793 Overlap of trajectory 2 with itself: 0.3618 Similary 0.9003 SIMILARITIES BETWEEN TRAJECTORY 0 and 2 Overlap between trajectories: 0.2967 Overlap of trajectory 1 with itself: 0.3793 Overlap of trajectory 2 with itself: 0.362 Similary 0.9261 SIMILARITIES BETWEEN TRAJECTORY 0 and 3 Overlap between trajectories: 0.2572 Overlap of trajectory 1 with itself: 0.3793 Overlap of trajectory 2 with itself: 0.3568 Similary 0.8892 SIMILARITIES BETWEEN TRAJECTORY 0 and 4 Overlap between trajectories: 0.2287 Overlap of trajectory 1 with itself: 0.3793 Overlap of trajectory 2 with itself: 0.3767 Similary 0.8507 SIMILARITIES BETWEEN TRAJECTORY 0 and 5 Overlap between trajectories: 0.2411 Overlap of trajectory 1 with itself: 0.3793 Overlap of trajectory 2 with itself: 0.3382 Similary 0.8824 SIMILARITIES BETWEEN TRAJECTORY 1 and 2 Overlap between trajectories: 0.2855 Overlap of trajectory 1 with itself: 0.3618 Overlap of trajectory 2 with itself: 0.362 Similary 0.9236 SIMILARITIES BETWEEN TRAJECTORY 1 and 3 Overlap between trajectories: 0.2247 Overlap of trajectory 1 with itself: 0.3618 Overlap of trajectory 2 with itself: 0.3568 Similary 0.8654 SIMILARITIES BETWEEN TRAJECTORY 1 and 4 Overlap between trajectories: 0.2393 Overlap of trajectory 1 with itself: 0.3618 Overlap of trajectory 2 with itself: 0.3767 Similary 0.8701 SIMILARITIES BETWEEN TRAJECTORY 1 and 5 Overlap between trajectories: 0.2309 Overlap of trajectory 1 with itself: 0.3618 Overlap of trajectory 2 with itself: 0.3382 Similary 0.8809 SIMILARITIES BETWEEN TRAJECTORY 2 and 3 Overlap between trajectories: 0.2481 Overlap of trajectory 1 with itself: 0.362 Overlap of trajectory 2 with itself: 0.3568 Similary 0.8887 SIMILARITIES BETWEEN TRAJECTORY 2 and 4 Overlap between trajectories: 0.225 Overlap of trajectory 1 with itself: 0.362 Overlap of trajectory 2 with itself: 0.3767 Similary 0.8557 SIMILARITIES BETWEEN TRAJECTORY 2 and 5 Overlap between trajectories: 0.2332 Overlap of trajectory 1 with itself: 0.362 Overlap of trajectory 2 with itself: 0.3382 Similary 0.8831 SIMILARITIES BETWEEN TRAJECTORY 3 and 4 Overlap between trajectories: 0.2339 Overlap of trajectory 1 with itself: 0.3568 Overlap of trajectory 2 with itself: 0.3767 Similary 0.8672 SIMILARITIES BETWEEN TRAJECTORY 3 and 5 Overlap between trajectories: 0.2303 Overlap of trajectory 1 with itself: 0.3568 Overlap of trajectory 2 with itself: 0.3382 Similary 0.8828 SIMILARITIES BETWEEN TRAJECTORY 4 and 5 Overlap between trajectories: 0.2609 Overlap of trajectory 1 with itself: 0.3767 Overlap of trajectory 2 with itself: 0.3382 Similary 0.9034
# Statistic significance of convergence for block 100
# The groups are simulation 0,1,2 inactive 3,4,5 active
# No randomization group
within_conditions = [similarity_matrix_100_norand[0][1], similarity_matrix_100_norand[0][2], similarity_matrix_100_norand[1][2],
similarity_matrix_100_norand[3][4], similarity_matrix_100_norand[3][5], similarity_matrix_100_norand[4][5]]
between_conditions = [similarity_matrix_100_norand[0][3], similarity_matrix_100_norand[0][4], similarity_matrix_100_norand[0][5],
similarity_matrix_100_norand[1][3], similarity_matrix_100_norand[1][4], similarity_matrix_100_norand[1][5],
similarity_matrix_100_norand[2][3], similarity_matrix_100_norand[2][4], similarity_matrix_100_norand[2][5]]
tat, p_value = ttest_ind(within_conditions, between_conditions, equal_var=False, alternative='greater')
print(f"p-value of convergence within vs between conditions for the non randomization encoded trajectory with block 100 is {p_value}")
within_conditions = [similarity_matrix_50_norand[0][1], similarity_matrix_50_norand[0][2], similarity_matrix_50_norand[1][2],
similarity_matrix_50_norand[3][4], similarity_matrix_50_norand[3][5], similarity_matrix_50_norand[4][5]]
between_conditions = [similarity_matrix_50_norand[0][3], similarity_matrix_50_norand[0][4], similarity_matrix_50_norand[0][5],
similarity_matrix_50_norand[1][3], similarity_matrix_50_norand[1][4], similarity_matrix_50_norand[1][5],
similarity_matrix_50_norand[2][3], similarity_matrix_50_norand[2][4], similarity_matrix_50_norand[2][5]]
tat, p_value = ttest_ind(within_conditions, between_conditions, equal_var=False, alternative='greater')
print(f"p-value of convergence within vs between conditions for the non randomization encoded trajectory with block 50 is {p_value}")
within_conditions = [similarity_matrix_10_norand[0][1], similarity_matrix_10_norand[0][2], similarity_matrix_10_norand[1][2],
similarity_matrix_10_norand[3][4], similarity_matrix_10_norand[3][5], similarity_matrix_10_norand[4][5]]
between_conditions = [similarity_matrix_10_norand[0][3], similarity_matrix_10_norand[0][4], similarity_matrix_10_norand[0][5],
similarity_matrix_10_norand[1][3], similarity_matrix_10_norand[1][4], similarity_matrix_10_norand[1][5],
similarity_matrix_10_norand[2][3], similarity_matrix_10_norand[2][4], similarity_matrix_10_norand[2][5]]
tat, p_value = ttest_ind(within_conditions, between_conditions, equal_var=False, alternative='greater')
print(f"p-value of convergence within vs between conditions for the non randomization encoded trajectory with block 10 is {p_value}")
# Randomization group
within_conditions = [similarity_matrix_100[0][1], similarity_matrix_100[0][2], similarity_matrix_100[1][2],
similarity_matrix_100[3][4], similarity_matrix_100[3][5], similarity_matrix_100[4][5]]
between_conditions = [similarity_matrix_100[0][3], similarity_matrix_100[0][4], similarity_matrix_100[0][5],
similarity_matrix_100[1][3], similarity_matrix_100[1][4], similarity_matrix_100[1][5],
similarity_matrix_100[2][3], similarity_matrix_100[2][4], similarity_matrix_100[2][5]]
tat, p_value = ttest_ind(within_conditions, between_conditions, equal_var=False, alternative='greater')
print(f"p-value of convergence within vs between conditions for the randomized encoded trajectory with block 100 is {p_value}")
within_conditions = [similarity_matrix_50[0][1], similarity_matrix_50[0][2], similarity_matrix_50[1][2],
similarity_matrix_50[3][4], similarity_matrix_50[3][5], similarity_matrix_50[4][5]]
between_conditions = [similarity_matrix_50[0][3], similarity_matrix_50[0][4], similarity_matrix_50[0][5],
similarity_matrix_50[1][3], similarity_matrix_50[1][4], similarity_matrix_50[1][5],
similarity_matrix_50[2][3], similarity_matrix_50[2][4], similarity_matrix_50[2][5]]
tat, p_value = ttest_ind(within_conditions, between_conditions, equal_var=False, alternative='greater')
print(f"p-value of convergence within vs between conditions for the randomized encoded trajectory with block 50 is {p_value}")
within_conditions = [similarity_matrix_10[0][1], similarity_matrix_10[0][2], similarity_matrix_10[1][2],
similarity_matrix_10[3][4], similarity_matrix_10[3][5], similarity_matrix_10[4][5]]
between_conditions = [similarity_matrix_10[0][3], similarity_matrix_10[0][4], similarity_matrix_10[0][5],
similarity_matrix_10[1][3], similarity_matrix_10[1][4], similarity_matrix_10[1][5],
similarity_matrix_10[2][3], similarity_matrix_10[2][4], similarity_matrix_10[2][5]]
tat, p_value = ttest_ind(within_conditions, between_conditions, equal_var=False, alternative='greater')
print(f"p-value of convergence within vs between conditions for the randomized encoded trajectory with block 10 is {p_value}")
p-value of convergence within vs between conditions for the non randomization encoded trajectory with block 100 is 0.05317949717130676 p-value of convergence within vs between conditions for the non randomization encoded trajectory with block 50 is 0.09275176445640934 p-value of convergence within vs between conditions for the non randomization encoded trajectory with block 10 is 0.05303464738267857 p-value of convergence within vs between conditions for the randomized encoded trajectory with block 100 is 0.004525261989032756 p-value of convergence within vs between conditions for the randomized encoded trajectory with block 50 is 0.02122564681572018 p-value of convergence within vs between conditions for the randomized encoded trajectory with block 10 is 0.03040943603611007
# Create a box plot for the different block size
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# df to hold data
data = {
"Block Size": [],
"Group": [],
"Similarity": []
}
# group that each index belongs to.
group = [0,0,0,1,1,1]
# Mapping of names to similarity matrices
sim_map = {"100": similarity_matrix_100_norand, "50": similarity_matrix_50_norand, "10": similarity_matrix_10_norand,
"100 rand": similarity_matrix_100, "50 rand": similarity_matrix_50, "10 rand": similarity_matrix_10}
# Format the data into a dict for seaborn
for key in sim_map:
matrix_to_use = sim_map[key]
for i in range(6): # 6 simulations
for j in range(i,6):
g1 = group[i]
g2 = group[j]
if i == j:
continue
elif g1 == g2:
data["Block Size"].append(key)
data["Group"].append("Within conditions")
data["Similarity"].append(matrix_to_use[i][j])
else:
data["Block Size"].append(key)
data["Group"].append("Between conditions")
data["Similarity"].append(matrix_to_use[i][j])
# Convert data to DataFrame
df = pd.DataFrame(data)
# Set Seaborn style
sns.set_theme(style="whitegrid", palette="crest")
# Create the boxplot
plt.figure(figsize=(10, 6))
sns.boxplot( data=df, x="Block Size", y="Similarity", hue="Group")
# Add labels
plt.xlabel("Block Size", fontsize=12)
plt.ylabel("Similarity", fontsize=12)
plt.legend(title="Group")
# Show the plot
plt.show()
The similarity (convergence) for the non randomized at block size 10 is barely significative p-value ~ 0.05 while the randomization of blocks greatly helped the convergence of the signal with p-values < 0.05 for all block sizes.
Finally, we will compare the top hits for the different encoding sizes with randomization and without.
# Find upregulated and downregulated fragments
print("Find upregulated and downregulated fragments for randomized 100")
# 100 block random
updown_regulated_fragments = overlap_100.updown_regulation(traj_mapping=[0,0,0,1,1,1],splitting=True)
# The obtained dictionary has as keys the pairs of conditions. In this case (0,1).
# If more conditions were used one would have all the additional pairing (0,1), (0,2), (1,2) ....
t12_updown_100 = updown_regulated_fragments[(0,1)]
# 100 block no random
updown_regulated_fragments = overlap_100_norand.updown_regulation(traj_mapping=[0,0,0,1,1,1],splitting=True)
t12_updown_100_norand = updown_regulated_fragments[(0,1)]
# 50 block random
updown_regulated_fragments = overlap_50.updown_regulation(traj_mapping=[0,0,0,1,1,1],splitting=True)
t12_updown_50 = updown_regulated_fragments[(0,1)]
# 50 block no random
updown_regulated_fragments = overlap_50_norand.updown_regulation(traj_mapping=[0,0,0,1,1,1],splitting=True)
t12_updown_50_norand = updown_regulated_fragments[(0,1)]
# 10 block random
updown_regulated_fragments = overlap_10.updown_regulation(traj_mapping=[0,0,0,1,1,1],splitting=True)
t12_updown_10 = updown_regulated_fragments[(0,1)]
# 10 block no random
updown_regulated_fragments = overlap_10_norand.updown_regulation(traj_mapping=[0,0,0,1,1,1],splitting=True)
t12_updown_10_norand = updown_regulated_fragments[(0,1)]
Find upregulated and downregulated fragments for randomized 100
FragmentPairs log2FoldChange PValues AdjustedPValues
0 (0, 1) -0.522549 1.339345e-02 0.034581
1 (0, 2) -1.094437 9.572307e-07 0.000008
2 (0, 3) -0.218183 2.442496e-01 0.361814
3 (0, 4) -0.760571 5.631604e-03 0.016565
4 (0, 5) -0.766811 1.257980e-03 0.004601
... ... ... ... ...
7255 (117, 119) 0.375417 3.133037e-01 0.434994
7256 (117, 120) 0.276478 3.662286e-01 0.487410
7257 (118, 119) 1.172490 2.578103e-02 0.059703
7258 (118, 120) 1.076641 2.037327e-02 0.049140
7259 (119, 120) -0.398515 1.525957e-02 0.038520
[7260 rows x 4 columns]
FragmentPairs log2FoldChange PValues AdjustedPValues
0 (0, 1) 0.454542 0.198620 0.430700
1 (0, 2) 0.131890 0.747486 0.870648
2 (0, 3) 0.976979 0.002844 0.025323
3 (0, 4) 0.259348 0.405593 0.626112
4 (0, 5) -0.106488 0.726527 0.859629
... ... ... ... ...
7255 (117, 119) 0.020499 0.965923 0.983121
7256 (117, 120) 0.047135 0.918079 0.961104
7257 (118, 119) -0.334203 0.536086 0.729929
7258 (118, 120) -0.144257 0.777131 0.886405
7259 (119, 120) -0.389978 0.228897 0.467190
[7260 rows x 4 columns]
FragmentPairs log2FoldChange PValues AdjustedPValues
0 (0, 1) -0.523155 3.096334e-04 9.639531e-04
1 (0, 2) -0.948512 1.584162e-11 1.390691e-10
2 (0, 3) -0.135474 3.171945e-01 4.139551e-01
3 (0, 4) -0.725702 6.263658e-04 1.813887e-03
4 (0, 5) -0.813870 3.087299e-05 1.154755e-04
... ... ... ... ...
7255 (117, 119) 0.358255 1.288376e-01 1.975419e-01
7256 (117, 120) 0.352574 9.379584e-02 1.513238e-01
7257 (118, 119) 0.929538 3.485537e-03 8.466041e-03
7258 (118, 120) 0.818631 4.451067e-03 1.055692e-02
7259 (119, 120) -0.490005 5.990874e-05 2.133092e-04
[7260 rows x 4 columns]
FragmentPairs log2FoldChange PValues AdjustedPValues
0 (0, 1) 0.289152 0.331329 0.533833
1 (0, 2) 0.237128 0.360665 0.560899
2 (0, 3) 0.776993 0.000375 0.003544
3 (0, 4) 0.250453 0.346289 0.547368
4 (0, 5) -0.121626 0.580416 0.748459
... ... ... ... ...
7255 (117, 119) -0.109493 0.723493 0.846479
7256 (117, 120) 0.103839 0.695979 0.826568
7257 (118, 119) -0.334476 0.332107 0.534492
7258 (118, 120) 0.072748 0.825344 0.909618
7259 (119, 120) -0.246504 0.299234 0.502762
[7260 rows x 4 columns]
FragmentPairs log2FoldChange PValues AdjustedPValues
0 (0, 1) -0.331704 1.953137e-06 4.495807e-06
1 (0, 2) -0.508715 5.727635e-13 2.282252e-12
2 (0, 3) 0.044968 6.407765e-01 6.870532e-01
3 (0, 4) -1.209684 3.350584e-13 1.356678e-12
4 (0, 5) -1.226485 5.721196e-20 3.452692e-19
... ... ... ... ...
7255 (117, 119) 0.396949 8.089016e-03 1.255639e-02
7256 (117, 120) 0.187222 1.552596e-01 1.944427e-01
7257 (118, 119) 0.741376 6.284903e-08 1.679367e-07
7258 (118, 120) 0.440293 1.055203e-03 1.838438e-03
7259 (119, 120) -0.404690 1.789773e-10 5.941359e-10
[7260 rows x 4 columns]
FragmentPairs log2FoldChange PValues AdjustedPValues
0 (0, 1) -0.768354 2.569098e-07 1.779744e-06
1 (0, 2) 0.434137 1.023072e-04 4.304868e-04
2 (0, 3) 0.262759 8.779505e-02 1.589218e-01
3 (0, 4) -1.023850 1.377697e-06 8.362941e-06
4 (0, 5) -1.127947 2.672290e-08 2.248068e-07
... ... ... ... ...
7255 (117, 119) -0.131397 5.982495e-01 7.053087e-01
7256 (117, 120) 0.213482 3.324949e-01 4.564889e-01
7257 (118, 119) -0.271250 2.447417e-01 3.625433e-01
7258 (118, 120) 0.200357 4.100453e-01 5.348417e-01
7259 (119, 120) -0.158504 1.459112e-01 2.419721e-01
[7260 rows x 4 columns]
Now we can filter the fragment pairs based on their p value and fold change
# Filtering parameters
pval_threshold = 0.01
fold_change_threshold = 2
# First extract significant fragments
significant_fragments_100 = t12_updown_100[t12_updown_100['AdjustedPValues'] < pval_threshold]
significant_fragments_100_norand = t12_updown_100_norand[t12_updown_100_norand['AdjustedPValues'] < pval_threshold]
significant_fragments_50 = t12_updown_50[t12_updown_50['AdjustedPValues'] < pval_threshold]
significant_fragments_50_norand = t12_updown_50_norand[t12_updown_50_norand['AdjustedPValues'] < pval_threshold]
significant_fragments_10 = t12_updown_10[t12_updown_10['AdjustedPValues'] < pval_threshold]
significant_fragments_10_norand = t12_updown_10_norand[t12_updown_10_norand['AdjustedPValues'] < pval_threshold]
# Second, filter by fold change and print top 5
# 100 random
up_reg_100 = significant_fragments_100[significant_fragments_100['log2FoldChange'] > fold_change_threshold].sort_values('log2FoldChange', ascending=False)
print(up_reg_100.head(5))
# 50 random
up_reg_50 = significant_fragments_50[significant_fragments_50['log2FoldChange'] > fold_change_threshold].sort_values('log2FoldChange', ascending=False)
print(up_reg_50.head(5))
# 10 random
up_reg_10 = significant_fragments_10[significant_fragments_10['log2FoldChange'] > fold_change_threshold].sort_values('log2FoldChange', ascending=False)
print(up_reg_10.head(5))
FragmentPairs log2FoldChange PValues AdjustedPValues
1776 (15, 97) 8.568563 1.143804e-12 3.533624e-11
2865 (26, 97) 5.910179 5.701885e-21 1.799812e-18
1131 (9, 97) 4.551334 1.376245e-14 6.404831e-13
6801 (90, 97) 4.501422 2.943828e-13 1.037485e-11
1020 (8, 97) 4.158793 3.712417e-10 6.224514e-09
FragmentPairs log2FoldChange PValues AdjustedPValues
2865 (26, 97) 6.894309 2.244594e-43 3.259151e-40
6692 (86, 114) 5.866300 5.037963e-05 1.819682e-04
1892 (16, 109) 5.579663 2.665850e-05 1.008025e-04
7187 (108, 114) 4.949725 3.119012e-03 7.655182e-03
1131 (9, 97) 4.948381 5.884336e-35 1.256479e-32
FragmentPairs log2FoldChange PValues AdjustedPValues
1131 (9, 97) 6.427045 2.114277e-161 2.558275e-158
2865 (26, 97) 6.059063 1.470354e-122 5.083224e-120
6587 (83, 114) 4.813141 8.800492e-28 7.782164e-27
1350 (11, 97) 4.611329 3.698651e-159 3.836029e-156
96 (0, 97) 4.547034 1.204831e-222 8.747074e-219
With the randomization approach the three block sizes achieved similar upregulated fragments
# Second, filter by fold change and print top 5
# 100 random
up_reg_100_norand = significant_fragments_100_norand[significant_fragments_100_norand['log2FoldChange'] > fold_change_threshold].sort_values('log2FoldChange', ascending=False)
print(up_reg_100.head(5))
# 50 random
up_reg_50_norand = significant_fragments_50_norand[significant_fragments_50_norand['log2FoldChange'] > fold_change_threshold].sort_values('log2FoldChange', ascending=False)
print(up_reg_50.head(5))
# 10 random
up_reg_10_norand = significant_fragments_10_norand[significant_fragments_10_norand['log2FoldChange'] > fold_change_threshold].sort_values('log2FoldChange', ascending=False)
print(up_reg_10.head(5))
FragmentPairs log2FoldChange PValues AdjustedPValues
1776 (15, 97) 8.568563 1.143804e-12 3.533624e-11
2865 (26, 97) 5.910179 5.701885e-21 1.799812e-18
1131 (9, 97) 4.551334 1.376245e-14 6.404831e-13
6801 (90, 97) 4.501422 2.943828e-13 1.037485e-11
1020 (8, 97) 4.158793 3.712417e-10 6.224514e-09
FragmentPairs log2FoldChange PValues AdjustedPValues
2865 (26, 97) 6.894309 2.244594e-43 3.259151e-40
6692 (86, 114) 5.866300 5.037963e-05 1.819682e-04
1892 (16, 109) 5.579663 2.665850e-05 1.008025e-04
7187 (108, 114) 4.949725 3.119012e-03 7.655182e-03
1131 (9, 97) 4.948381 5.884336e-35 1.256479e-32
FragmentPairs log2FoldChange PValues AdjustedPValues
1131 (9, 97) 6.427045 2.114277e-161 2.558275e-158
2865 (26, 97) 6.059063 1.470354e-122 5.083224e-120
6587 (83, 114) 4.813141 8.800492e-28 7.782164e-27
1350 (11, 97) 4.611329 3.698651e-159 3.836029e-156
96 (0, 97) 4.547034 1.204831e-222 8.747074e-219
# Function to obtain most frequent fragments
def get_top_fragments(df_reg):
top_fragments_count = {}
for fragment_pair in df_reg["FragmentPairs"]:
top_fragments_count.setdefault(fragment_pair[0], 0) # record first fragment of the pair
top_fragments_count.setdefault(fragment_pair[1], 0) # record second fragment of the pair
top_fragments_count[fragment_pair[0]] += 1
top_fragments_count[fragment_pair[1]] += 1
# sort based on counts
top_fragments_count = dict(sorted(top_fragments_count.items(), key=lambda item: item[1], reverse=True))
return top_fragments_count
# Block 100 randomized
top_fragments_count = get_top_fragments(up_reg_100)
# Print top 5 most appearing fragments
dict_keys = list(top_fragments_count.keys())
print("NtrC 100 block size randomized")
for i in range(5):
frag = dict_keys[i]
print(f"Fragment {frag} appears {top_fragments_count[frag]} times.")
# Block 50 randomized
top_fragments_count = get_top_fragments(up_reg_50)
# Print top 5 most appearing fragments
dict_keys = list(top_fragments_count.keys())
print("NtrC 50 block size randomized")
for i in range(5):
frag = dict_keys[i]
print(f"Fragment {frag} appears {top_fragments_count[frag]} times.")
# Block 10 randomized
top_fragments_count = get_top_fragments(up_reg_10)
# Print top 5most appearing fragments
dict_keys = list(top_fragments_count.keys())
print("NtrC 10 block size randomized")
for i in range(5):
frag = dict_keys[i]
print(f"Fragment {frag} appears {top_fragments_count[frag]} times.")
NtrC 100 block size randomized Fragment 90 appears 58 times. Fragment 91 appears 57 times. Fragment 89 appears 56 times. Fragment 97 appears 44 times. Fragment 92 appears 27 times. NtrC 50 block size randomized Fragment 91 appears 66 times. Fragment 89 appears 54 times. Fragment 90 appears 49 times. Fragment 97 appears 33 times. Fragment 92 appears 28 times. NtrC 10 block size randomized Fragment 91 appears 47 times. Fragment 89 appears 32 times. Fragment 92 appears 29 times. Fragment 90 appears 27 times. Fragment 97 appears 22 times.
All block sizes yielded a similar results with the same key fragments appearing on the top hits
# Block 100 non randomized
top_fragments_count = get_top_fragments(up_reg_100_norand)
# Print top 5 most appearing fragments
dict_keys = list(top_fragments_count.keys())
print("NtrC 100 block size non randomized")
for i in range(5):
frag = dict_keys[i]
print(f"Fragment {frag} appears {top_fragments_count[frag]} times.")
# Block 50 non randomized
top_fragments_count = get_top_fragments(up_reg_50_norand)
# Print top 5 most appearing fragments
dict_keys = list(top_fragments_count.keys())
print("NtrC 50 block size non randomized")
for i in range(5):
frag = dict_keys[i]
print(f"Fragment {frag} appears {top_fragments_count[frag]} times.")
# Block 10 non randomized
top_fragments_count = get_top_fragments(up_reg_10_norand)
# Print top 5 most appearing fragments
dict_keys = list(top_fragments_count.keys())
print("NtrC 10 block size non randomized")
for i in range(5):
frag = dict_keys[i]
print(f"Fragment {frag} appears {top_fragments_count[frag]} times.")
NtrC 100 block size non randomized Fragment 89 appears 20 times. Fragment 91 appears 20 times. Fragment 97 appears 19 times. Fragment 90 appears 17 times. Fragment 88 appears 12 times. NtrC 50 block size non randomized Fragment 97 appears 15 times. Fragment 91 appears 9 times. Fragment 90 appears 7 times. Fragment 89 appears 6 times. Fragment 96 appears 6 times. NtrC 10 block size non randomized Fragment 83 appears 22 times. Fragment 60 appears 19 times. Fragment 84 appears 16 times. Fragment 58 appears 9 times. Fragment 62 appears 9 times.
Still a similar signal can be recovered for the top appearing fragments, given a long enough trajectory.
In this section, we have examined how to directly encode MD trajectories into structural alphabets and observed the improvements on consistency and reproducibility added by the addition of randomized encoding blocks.
On this next section the structural alphabets of 3DI and M32k25 will be compared.
We will use a block size of 100 and randomized blocks for both alphabets. This section will recompute the whole process with the exception of the step to convert the MD into structural alphabets, but those files are already precomputed in the data folder. If the previous section has been already run, one can comment the lines for already precomputed objects.
# Initialize Structural Alphabet trajectory handler
print("Initialize Structural Alphabet trajectory handler")
# Set seeds for reproducibility
seed = 42 # Replace with any integer
import random
np.random.seed(seed)
random.seed(seed)
# With randomization
sa_ntrc_active_100_1 = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_active_100_2 = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_active_100_3 = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_inactive_100_1 = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_inactive_100_2 = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_inactive_100_3 = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_active_100_1_3di = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["3DI"])
sa_ntrc_active_100_2_3di = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["3DI"])
sa_ntrc_active_100_3_3di = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["3DI"])
sa_ntrc_inactive_100_1_3di = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["3DI"])
sa_ntrc_inactive_100_2_3di = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["3DI"])
sa_ntrc_inactive_100_3_3di = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["3DI"])
# Load data
sa_ntrc_active_100_1.load_data("data_NtrC/encoded_NtrC_active_repl1_mk.sa")
sa_ntrc_active_100_2.load_data("data_NtrC/encoded_NtrC_active_repl2_mk.sa")
sa_ntrc_active_100_3.load_data("data_NtrC/encoded_NtrC_active_repl3_mk.sa")
sa_ntrc_inactive_100_1.load_data("data_NtrC/encoded_NtrC_inactive_repl1_mk.sa")
sa_ntrc_inactive_100_2.load_data("data_NtrC/encoded_NtrC_inactive_repl2_mk.sa")
sa_ntrc_inactive_100_3.load_data("data_NtrC/encoded_NtrC_inactive_repl3_mk.sa")
sa_ntrc_active_100_1_3di.load_data("data_NtrC/encoded_NtrC_active_repl1_3di.sa")
sa_ntrc_active_100_2_3di.load_data("data_NtrC/encoded_NtrC_active_repl2_3di.sa")
sa_ntrc_active_100_3_3di.load_data("data_NtrC/encoded_NtrC_active_repl3_3di.sa")
sa_ntrc_inactive_100_1_3di.load_data("data_NtrC/encoded_NtrC_inactive_repl1_3di.sa")
sa_ntrc_inactive_100_2_3di.load_data("data_NtrC/encoded_NtrC_inactive_repl2_3di.sa")
sa_ntrc_inactive_100_3_3di.load_data("data_NtrC/encoded_NtrC_inactive_repl3_3di.sa")
Initialize Structural Alphabet trajectory handler
One can examine the encoded structure string as well as all other analysis using the provided plotting functions.
Alternatively, one can addapt the provided plotting functions for other applications. They are all locate din the file Allohub_plots.py
To display the plots the argument action = "show" should be used while for saving to a file it should be action="save" If the save option is provided the name of the file can be specified with name="my_name.png". The format of the image will depend on the format specified in the file name.
# Plot the randomized trajectory of the Structural Alphabet M32K25 of NtrC active repl1
Allohub_plots.plot_SA_traj(sa_ntrc_active_100_1.get_int_traj(), SAtraj.ALPHABETS["M32K25"], action="show")
# Plot the randomized trajectory of the Structural Alphabet M32K25 of NtrC inactive repl1
Allohub_plots.plot_SA_traj(sa_ntrc_inactive_100_1.get_int_traj(), SAtraj.ALPHABETS["M32K25"], action="show")
# Plot the randomized trajectory of the Structural Alphabet 3DI of NtrC active repl1
Allohub_plots.plot_SA_traj(sa_ntrc_active_100_1_3di.get_int_traj(), SAtraj.ALPHABETS["3DI"], action="show")
# Plot the randomized trajectory of the Structural Alphabet 3DI of NtrC inactive repl1
Allohub_plots.plot_SA_traj(sa_ntrc_inactive_100_1_3di.get_int_traj(), SAtraj.ALPHABETS["3DI"], action="show")
From the trajectory plots alone, one can already appreciate differences. M32K25 captures clear differences around the fragments at positions ~90 and shows small difference on stable regions while the 3DI alphabet is better are capturing small difference in all the protein but does not capture as well the conformation change at positions ~90
Now we will proceed to compute the mutual information for each block. Each trajectory will produce 9 blocks of 100ns each
print("Calculate the MI information")
# One can specify the number of workers to parallelize the process. max_workers=None would use all available resources.
mi_ntrc_inactive_100_1 = sa_ntrc_inactive_100_1.compute_mis(max_workers=7)
print("NtrC inactive repl 1 finished")
mi_ntrc_inactive_100_2 = sa_ntrc_inactive_100_2.compute_mis(max_workers=7)
print("NtrC inactive repl 2 finished")
mi_ntrc_inactive_100_3 = sa_ntrc_inactive_100_3.compute_mis(max_workers=7)
print("NtrC inactive repl 3 finished")
mi_ntrc_active_100_1 = sa_ntrc_active_100_1.compute_mis(max_workers=7)
print("NtrC active repl 1 finished")
mi_ntrc_active_100_2 = sa_ntrc_active_100_2.compute_mis(max_workers=7)
print("NtrC active repl 2 finished")
mi_ntrc_active_100_3 = sa_ntrc_active_100_3.compute_mis(max_workers=7)
print("NtrC active repl 3 finished")
mi_ntrc_inactive_100_1_3di = sa_ntrc_inactive_100_1_3di.compute_mis(max_workers=7)
print("NtrC inactive, 3di alphabet, repl 1 finished")
mi_ntrc_inactive_100_2_3di = sa_ntrc_inactive_100_2_3di.compute_mis(max_workers=7)
print("NtrC inactive, 3di alphabet, repl 2 finished")
mi_ntrc_inactive_100_3_3di = sa_ntrc_inactive_100_3_3di.compute_mis(max_workers=7)
print("NtrC inactive, 3di alphabet, repl 3 finished")
mi_ntrc_active_100_1_3di = sa_ntrc_active_100_1_3di.compute_mis(max_workers=7)
print("NtrC active, 3di alphabet, repl 1 finished")
mi_ntrc_active_100_2_3di = sa_ntrc_active_100_2_3di.compute_mis(max_workers=7)
print("NtrC active, 3di alphabet, repl 2 finished")
mi_ntrc_active_100_3_3di = sa_ntrc_active_100_3_3di.compute_mis(max_workers=7)
print("NtrC active, 3di alphabet, repl 3 finished")
Calculate the MI information ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:33<00:00, 3.69s/block]
TRAJECTORY COMPLETED NtrC inactive repl 1 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:30<00:00, 3.34s/block]
TRAJECTORY COMPLETED NtrC inactive repl 2 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:31<00:00, 3.55s/block]
TRAJECTORY COMPLETED NtrC inactive repl 3 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:33<00:00, 3.76s/block]
TRAJECTORY COMPLETED NtrC active repl 1 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:33<00:00, 3.72s/block]
TRAJECTORY COMPLETED NtrC active repl 2 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:29<00:00, 3.31s/block]
TRAJECTORY COMPLETED NtrC active repl 3 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:26<00:00, 2.93s/block]
TRAJECTORY COMPLETED NtrC inactive, 3di alphabet, repl 1 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:28<00:00, 3.12s/block]
TRAJECTORY COMPLETED NtrC inactive, 3di alphabet, repl 2 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:30<00:00, 3.35s/block]
TRAJECTORY COMPLETED NtrC inactive, 3di alphabet, repl 3 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:30<00:00, 3.39s/block]
TRAJECTORY COMPLETED NtrC active, 3di alphabet, repl 1 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:24<00:00, 2.72s/block]
TRAJECTORY COMPLETED NtrC active, 3di alphabet, repl 2 finished ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:30<00:00, 3.35s/block]
TRAJECTORY COMPLETED NtrC active, 3di alphabet, repl 3 finished
Shanon entropy of the fragments gives an idea of sturctural flexibility. In the case of the M32k25 alphabet this analysis is complementary to cartesian analysis such as RMSF since fragment entropy captures local changes regardless of magnitude of difference thanks to the alphabets being based on internal coordinates.
# Compute the shanon entropy
print("Compute the shanon entropy")
entropy_ntrc_active_1 = sa_ntrc_active_100_1.compute_entropy()
entropy_ntrc_active_2 = sa_ntrc_active_100_2.compute_entropy()
entropy_ntrc_active_3 = sa_ntrc_active_100_3.compute_entropy()
entropy_ntrc_inactive_1 = sa_ntrc_inactive_100_1.compute_entropy()
entropy_ntrc_inactive_2 = sa_ntrc_inactive_100_2.compute_entropy()
entropy_ntrc_inactive_3 = sa_ntrc_inactive_100_3.compute_entropy()
entropy_ntrc_active_1_3di = sa_ntrc_active_100_1_3di.compute_entropy()
entropy_ntrc_active_2_3di = sa_ntrc_active_100_2_3di.compute_entropy()
entropy_ntrc_active_3_3di = sa_ntrc_active_100_3_3di.compute_entropy()
entropy_ntrc_inactive_1_3di = sa_ntrc_inactive_100_1_3di.compute_entropy()
entropy_ntrc_inactive_2_3di = sa_ntrc_inactive_100_2_3di.compute_entropy()
entropy_ntrc_inactive_3_3di = sa_ntrc_inactive_100_3_3di.compute_entropy()
Compute the shanon entropy
# Plot the entropies of active NtrC with their standard deviations for the trajectories encoded with M32k25 alphabet.
Allohub_plots.plot_shanon_entropy_sd([entropy_ntrc_active_1, entropy_ntrc_active_2, entropy_ntrc_active_3], action="show", ylim=(0,4))
# Plot the entropies of inactive NtrC with their standard deviations for the trajectories encoded with M32k25 alphabet.
Allohub_plots.plot_shanon_entropy_sd([entropy_ntrc_inactive_1, entropy_ntrc_inactive_2, entropy_ntrc_inactive_3], action="show", ylim=(0,4))
# Plot the entropies of active NtrC with their standard deviations for the trajectories encoded with 3di alphabet.
Allohub_plots.plot_shanon_entropy_sd([entropy_ntrc_active_1_3di, entropy_ntrc_active_2_3di, entropy_ntrc_active_3_3di], action="show", ylim=(0,4))
# Plot the entropies of inactive NtrC with their standard deviations for the trajectories encoded with 3di alphabet.
Allohub_plots.plot_shanon_entropy_sd([entropy_ntrc_inactive_1_3di, entropy_ntrc_inactive_2_3di, entropy_ntrc_inactive_3_3di], action="show", ylim=(0,4))
The differences can be better observed by substracting the mean entropy arrays.
# Compute the mean entropy per condition for the M32K25 alphabet
mean_act = np.mean([np.array(entropy_ntrc_active_1), np.array(entropy_ntrc_active_2), np.array(entropy_ntrc_active_3)], axis=0)
mean_inact = np.mean([np.array(entropy_ntrc_inactive_1), np.array(entropy_ntrc_inactive_2), np.array(entropy_ntrc_inactive_3)], axis=0)
diff = mean_act - mean_inact
# Plot the mean entropy
Allohub_plots.plot_shanon_entropy(diff, ylim=(-3,2.5), action="show", name="mk_entro.svg")
# Compute the mean entropy per condition for the 3di alphabet
mean_act = np.mean([np.array(entropy_ntrc_active_1_3di), np.array(entropy_ntrc_active_2_3di), np.array(entropy_ntrc_active_3_3di)], axis=0)
mean_inact = np.mean([np.array(entropy_ntrc_inactive_1_3di), np.array(entropy_ntrc_inactive_2_3di), np.array(entropy_ntrc_inactive_3_3di)], axis=0)
diff = mean_act - mean_inact
# Plot the mean entropy
Allohub_plots.plot_shanon_entropy(diff, ylim=(-3,2.5), action="show", name="entro_3di.svg")
Positions around fragment 60 and 90 are known positions that undergo conformational and flexibility changes, suggesting that the approach correctly described this effect.
Following the previous analysis, we will now extract important fragments and possible signaling path for the NtrC. For that we will use the trajectories encoded with M32K25, which managed to capture the conformation space changes with more sensitivity.
The eigenvector decomposition of the obtained MI matrices can be used to asses convergence. The main motions of well converged simulations should have relatively high eigenvector overlap (>0.3).
Overlap between replicates can be used as a way to asses relaiability of the results. (Higher convergences = Higher confidences).
# Do an eigenvector decomposition of the matrices
from tqdm import tqdm
print("Do an eigenvector decomposition of the matrices")
for mi_tr in tqdm(mi_ntrc_active_100_1, desc="Eigenvector decomposition for NtrC active 1", unit="matrix"):
mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_active_100_2, desc="Eigenvector decomposition for NtrC active 2", unit="matrix"):
mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_active_100_3, desc="Eigenvector decomposition for NtrC active 3", unit="matrix"):
mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_inactive_100_1, desc="Eigenvector decomposition for NtrC inactive 1", unit="matrix"):
mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_inactive_100_2, desc="Eigenvector decomposition for NtrC inactive 2", unit="matrix"):
mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_inactive_100_3, desc="Eigenvector decomposition for NtrC inactive 3", unit="matrix"):
mi_tr.compute_eigensystem()
Do an eigenvector decomposition of the matrices
Eigenvector decomposition for NtrC active 1: 100%|██████████| 9/9 [00:00<00:00, 141.42matrix/s] Eigenvector decomposition for NtrC active 2: 100%|██████████| 9/9 [00:00<00:00, 121.60matrix/s] Eigenvector decomposition for NtrC active 3: 100%|██████████| 9/9 [00:00<00:00, 200.72matrix/s] Eigenvector decomposition for NtrC inactive 1: 100%|██████████| 9/9 [00:00<00:00, 492.82matrix/s] Eigenvector decomposition for NtrC inactive 2: 100%|██████████| 9/9 [00:00<00:00, 428.64matrix/s] Eigenvector decomposition for NtrC inactive 3: 100%|██████████| 9/9 [00:00<00:00, 243.11matrix/s]
Overlap can be now computed using the Overlap object.
In this analysis we are using the top 3 eigenvectors which should explain most of the observed variability.
# Create the overlap handler to compute similarities between the trajectories
# M32K25
overlap_100 = Overlap.Overlap([mi_ntrc_inactive_100_1, mi_ntrc_inactive_100_2, mi_ntrc_inactive_100_3,
mi_ntrc_active_100_1, mi_ntrc_active_100_2, mi_ntrc_active_100_3], ev_list=[0,1,2])
overlap_100.fill_overlap_matrix()
# Convergence with alphabet M32K25
Allohub_plots.plot_overlap(overlap_100.get_overlap_matrix(), vmax=0.4, action="show")
# Compute similarities between overlap matrices
similarity_matrix_100 = overlap_100.compute_similarities()
SIMILARITIES BETWEEN TRAJECTORY 0 and 1 Overlap between trajectories: 0.2601 Overlap of trajectory 1 with itself: 0.5077 Overlap of trajectory 2 with itself: 0.5491 Similary 0.7317 SIMILARITIES BETWEEN TRAJECTORY 0 and 2 Overlap between trajectories: 0.2776 Overlap of trajectory 1 with itself: 0.5077 Overlap of trajectory 2 with itself: 0.4596 Similary 0.7939 SIMILARITIES BETWEEN TRAJECTORY 0 and 3 Overlap between trajectories: 0.2513 Overlap of trajectory 1 with itself: 0.5077 Overlap of trajectory 2 with itself: 0.5566 Similary 0.7191 SIMILARITIES BETWEEN TRAJECTORY 0 and 4 Overlap between trajectories: 0.2028 Overlap of trajectory 1 with itself: 0.5077 Overlap of trajectory 2 with itself: 0.5372 Similary 0.6803 SIMILARITIES BETWEEN TRAJECTORY 0 and 5 Overlap between trajectories: 0.2106 Overlap of trajectory 1 with itself: 0.5077 Overlap of trajectory 2 with itself: 0.5158 Similary 0.6988 SIMILARITIES BETWEEN TRAJECTORY 1 and 2 Overlap between trajectories: 0.2559 Overlap of trajectory 1 with itself: 0.5491 Overlap of trajectory 2 with itself: 0.4596 Similary 0.7515 SIMILARITIES BETWEEN TRAJECTORY 1 and 3 Overlap between trajectories: 0.1944 Overlap of trajectory 1 with itself: 0.5491 Overlap of trajectory 2 with itself: 0.5566 Similary 0.6416 SIMILARITIES BETWEEN TRAJECTORY 1 and 4 Overlap between trajectories: 0.2403 Overlap of trajectory 1 with itself: 0.5491 Overlap of trajectory 2 with itself: 0.5372 Similary 0.6971 SIMILARITIES BETWEEN TRAJECTORY 1 and 5 Overlap between trajectories: 0.2074 Overlap of trajectory 1 with itself: 0.5491 Overlap of trajectory 2 with itself: 0.5158 Similary 0.6749 SIMILARITIES BETWEEN TRAJECTORY 2 and 3 Overlap between trajectories: 0.2186 Overlap of trajectory 1 with itself: 0.4596 Overlap of trajectory 2 with itself: 0.5566 Similary 0.7105 SIMILARITIES BETWEEN TRAJECTORY 2 and 4 Overlap between trajectories: 0.199 Overlap of trajectory 1 with itself: 0.4596 Overlap of trajectory 2 with itself: 0.5372 Similary 0.7006 SIMILARITIES BETWEEN TRAJECTORY 2 and 5 Overlap between trajectories: 0.1805 Overlap of trajectory 1 with itself: 0.4596 Overlap of trajectory 2 with itself: 0.5158 Similary 0.6928 SIMILARITIES BETWEEN TRAJECTORY 3 and 4 Overlap between trajectories: 0.2327 Overlap of trajectory 1 with itself: 0.5566 Overlap of trajectory 2 with itself: 0.5372 Similary 0.6858 SIMILARITIES BETWEEN TRAJECTORY 3 and 5 Overlap between trajectories: 0.2145 Overlap of trajectory 1 with itself: 0.5566 Overlap of trajectory 2 with itself: 0.5158 Similary 0.6784 SIMILARITIES BETWEEN TRAJECTORY 4 and 5 Overlap between trajectories: 0.2812 Overlap of trajectory 1 with itself: 0.5372 Overlap of trajectory 2 with itself: 0.5158 Similary 0.7547
# Statistic significance of convergence for M32K25
# The groups are simulation 0,1,2 inactive 3,4,5 active
# The similarity_matrix contains the overlap between the indexed trajectories.
within_conditions = [similarity_matrix_100[0][1], similarity_matrix_100[0][2], similarity_matrix_100[1][2],
similarity_matrix_100[3][4], similarity_matrix_100[3][5], similarity_matrix_100[4][5]]
between_conditions = [similarity_matrix_100[0][3], similarity_matrix_100[0][4], similarity_matrix_100[0][5],
similarity_matrix_100[1][3], similarity_matrix_100[1][4], similarity_matrix_100[1][5],
similarity_matrix_100[2][3], similarity_matrix_100[2][4], similarity_matrix_100[2][5]]
tat, p_value = ttest_ind(within_conditions, between_conditions, equal_var=False, alternative='greater')
print(f"p-value of convergence within vs between conditions for NtrC with alphabet M32K25 is {p_value}")
p-value of convergence within vs between conditions for NtrC with alphabet M32K25 is 0.004525261989032756
The obtained p-value indicates that the convergence within replicates is signicantly different than the convergence between conditions, meaning that the analysis captured unique signal for this system.
The next step is to find up and down regulated fragments. For that, one needs the mapping of trajectories, that is to which condition each simulation belongs.
In this case we have two conditions, inactive (0) and active (1).
The argument splitting controls wether the statistics should be compute using the mean mi matrices per replicate (splitting = False) or using all the mi matrices (splitting = True).
Using all mi matrices will show more coupled fragments but will produce a higher degree of false positives.
For this analysis we will use all mi matrices.
# Find upregulated and downregulated fragments
print("Find upregulated and downregulated fragments")
# The fold change of non correlated points would produce a division by zero runtime warning this warnings can be silenced as following:
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)
updown_regulated_fragments = overlap_100.updown_regulation(traj_mapping=[0,0,0,1,1,1],splitting=True)
# The obtained dictionary has as keys the pairs of conditions. In this case (0,1).
# If more conditions were used one would have all the additional pairing (0,1), (0,2), (1,2) ....
t12_updown = updown_regulated_fragments[(0,1)]
Find upregulated and downregulated fragments
FragmentPairs log2FoldChange PValues AdjustedPValues
0 (0, 1) -0.522549 1.339345e-02 0.034581
1 (0, 2) -1.094437 9.572307e-07 0.000008
2 (0, 3) -0.218183 2.442496e-01 0.361814
3 (0, 4) -0.760571 5.631604e-03 0.016565
4 (0, 5) -0.766811 1.257980e-03 0.004601
... ... ... ... ...
7255 (117, 119) 0.375417 3.133037e-01 0.434994
7256 (117, 120) 0.276478 3.662286e-01 0.487410
7257 (118, 119) 1.172490 2.578103e-02 0.059703
7258 (118, 120) 1.076641 2.037327e-02 0.049140
7259 (119, 120) -0.398515 1.525957e-02 0.038520
[7260 rows x 4 columns]
Now we can filter the up and down regulated fragments based on a p value and fold change
# Filtering parameters
pval_threshold = 0.01
fold_change_threshold = 2
# First extract individual fragments for ease of access
fragment_pairs = t12_updown["FragmentPairs"]
f1 = [x[0] for x in fragment_pairs]
f2 = [x[1] for x in fragment_pairs]
t12_updown["f1"] = f1
t12_updown["f2"] = f2
# Now extract significant fragments
significant_fragments = t12_updown[t12_updown['AdjustedPValues'] < pval_threshold]
# Second, filter by fold change and print top 10
up_reg = significant_fragments[significant_fragments['log2FoldChange'] > fold_change_threshold].sort_values('log2FoldChange', ascending=False)
up_reg.head(10)
| FragmentPairs | log2FoldChange | PValues | AdjustedPValues | f1 | f2 | |
|---|---|---|---|---|---|---|
| 1776 | (15, 97) | 8.568563 | 1.143804e-12 | 3.533624e-11 | 15 | 97 |
| 2865 | (26, 97) | 5.910179 | 5.701885e-21 | 1.799812e-18 | 26 | 97 |
| 1131 | (9, 97) | 4.551334 | 1.376245e-14 | 6.404831e-13 | 9 | 97 |
| 6801 | (90, 97) | 4.501422 | 2.943828e-13 | 1.037485e-11 | 90 | 97 |
| 1020 | (8, 97) | 4.158793 | 3.712417e-10 | 6.224514e-09 | 8 | 97 |
| 6290 | (76, 97) | 3.868711 | 2.552682e-21 | 9.266236e-19 | 76 | 97 |
| 6771 | (89, 97) | 3.846886 | 9.854762e-24 | 7.949508e-21 | 89 | 97 |
| 6702 | (87, 91) | 3.780066 | 5.791781e-11 | 1.161556e-09 | 87 | 91 |
| 6734 | (88, 91) | 3.757264 | 1.472253e-18 | 2.274161e-16 | 88 | 91 |
| 6858 | (92, 97) | 3.700540 | 1.155911e-22 | 5.994223e-20 | 92 | 97 |
# Show top 10 Down-regulated fragments
down_reg = significant_fragments[significant_fragments['log2FoldChange'] < -fold_change_threshold].sort_values('log2FoldChange')
down_reg.head(10)
| FragmentPairs | log2FoldChange | PValues | AdjustedPValues | f1 | f2 | |
|---|---|---|---|---|---|---|
| 7000 | (97, 114) | -7.220985 | 2.519139e-04 | 1.123400e-03 | 97 | 114 |
| 6998 | (97, 112) | -5.651724 | 1.822613e-03 | 6.364679e-03 | 97 | 112 |
| 1705 | (15, 26) | -4.384441 | 1.095541e-03 | 4.095587e-03 | 15 | 26 |
| 693 | (5, 109) | -4.013189 | 4.345503e-06 | 3.111277e-05 | 5 | 109 |
| 5608 | (63, 65) | -3.802584 | 9.350739e-08 | 9.558103e-07 | 63 | 65 |
| 7095 | (102, 109) | -3.725565 | 2.503383e-03 | 8.356120e-03 | 102 | 109 |
| 2592 | (23, 109) | -3.657752 | 3.001072e-04 | 1.307790e-03 | 23 | 109 |
| 299 | (2, 63) | -3.640988 | 8.256406e-17 | 7.309940e-15 | 2 | 63 |
| 5702 | (64, 103) | -3.471873 | 8.752932e-11 | 1.667881e-09 | 64 | 103 |
| 5699 | (64, 100) | -3.443561 | 8.360845e-08 | 8.659020e-07 | 64 | 100 |
# Most frequent fragments
top_fragments_count = {}
for fragment_pair in up_reg["FragmentPairs"]:
top_fragments_count.setdefault(fragment_pair[0], 0) # record first fragment of the pair
top_fragments_count.setdefault(fragment_pair[1], 0) # record second fragment of the pair
top_fragments_count[fragment_pair[0]] += 1
top_fragments_count[fragment_pair[1]] += 1
for fragment_pair in down_reg["FragmentPairs"]:
top_fragments_count.setdefault(fragment_pair[0], 0) # record first fragment of the pair
top_fragments_count.setdefault(fragment_pair[1], 0) # record second fragment of the pair
top_fragments_count[fragment_pair[0]] += 1
top_fragments_count[fragment_pair[1]] += 1
# sort based on counts
top_fragments_count = dict(sorted(top_fragments_count.items(), key=lambda item: item[1], reverse=True))
# Print top 10 most appearing fragments
dict_keys = list(top_fragments_count.keys())
for i in range(10):
frag = dict_keys[i]
print(f"Fragment {frag} appears {top_fragments_count[frag]} times.")
Fragment 64 appears 62 times. Fragment 63 appears 60 times. Fragment 90 appears 58 times. Fragment 91 appears 57 times. Fragment 89 appears 56 times. Fragment 109 appears 49 times. Fragment 97 appears 46 times. Fragment 92 appears 27 times. Fragment 88 appears 23 times. Fragment 93 appears 14 times.
Interpretation
One can also create a volcano plot of the data to visualize the results
# Plot volcano plot of Up and Down regulated fragments
Allohub_plots.plot_updownregulation(t12_updown, fold_threshold=fold_change_threshold, ylim=25, pvalue_threshold=pval_threshold, action="show")
# Check the pair 82 - 101
sorted_significant_fragments = significant_fragments.sort_values(by="log2FoldChange", key=abs, ascending=False).reset_index(drop=False)
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (80,99)]) # Fragments that contain residue 82 would be 79-82
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (81,99)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (82,99)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (80,100)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (81,100)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (82,100)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (80,101)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (81,101)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (82,101)])
Empty DataFrame
Columns: [index, FragmentPairs, log2FoldChange, PValues, AdjustedPValues, f1, f2]
Index: []
index FragmentPairs log2FoldChange PValues AdjustedPValues f1 f2
1900 6497 (81, 99) -0.888099 0.000174 0.000818 81 99
index FragmentPairs log2FoldChange PValues AdjustedPValues f1 f2
2147 6535 (82, 99) -0.650953 0.001518 0.005428 82 99
index FragmentPairs log2FoldChange PValues AdjustedPValues f1 f2
2184 6459 (80, 100) -0.564555 0.00265 0.008773 80 100
index FragmentPairs log2FoldChange PValues AdjustedPValues f1 f2
1748 6498 (81, 100) -1.003776 0.000003 0.000024 81 100
index FragmentPairs log2FoldChange PValues AdjustedPValues f1 f2
1624 6536 (82, 100) -1.082251 0.00002 0.000124 82 100
Empty DataFrame
Columns: [index, FragmentPairs, log2FoldChange, PValues, AdjustedPValues, f1, f2]
Index: []
Empty DataFrame
Columns: [index, FragmentPairs, log2FoldChange, PValues, AdjustedPValues, f1, f2]
Index: []
Empty DataFrame
Columns: [index, FragmentPairs, log2FoldChange, PValues, AdjustedPValues, f1, f2]
Index: []
# Check the pair 66 - 99
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (64,97)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (65,97)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (66,97)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (64,98)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (65,98)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (66,98)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (64,99)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (65,99)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (66,99)])
# check 67 - (95,96)
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (65,94)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (66,94)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (67,94)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (65,95)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (66,95)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (67,95)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (65,96)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (66,96)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (67,96)])
Empty DataFrame
Columns: [index, FragmentPairs, log2FoldChange, PValues, AdjustedPValues, f1, f2]
Index: []
Empty DataFrame
Columns: [index, FragmentPairs, log2FoldChange, PValues, AdjustedPValues, f1, f2]
Index: []
Empty DataFrame
Columns: [index, FragmentPairs, log2FoldChange, PValues, AdjustedPValues, f1, f2]
Index: []
index FragmentPairs log2FoldChange PValues AdjustedPValues f1 \
484 5697 (64, 98) -2.0426 7.758154e-12 1.955701e-10 64
f2
484 98
index FragmentPairs log2FoldChange PValues AdjustedPValues f1 f2
1108 5752 (65, 98) -1.4107 0.00002 0.000124 65 98
index FragmentPairs log2FoldChange PValues AdjustedPValues f1 f2
1087 5806 (66, 98) -1.422074 0.000374 0.001587 66 98
index FragmentPairs log2FoldChange PValues AdjustedPValues f1 \
307 5698 (64, 99) -2.308837 2.270552e-10 3.981693e-09 64
f2
307 99
index FragmentPairs log2FoldChange PValues AdjustedPValues f1 \
460 5753 (65, 99) -2.076101 1.504028e-11 3.455458e-10 65
f2
460 99
index FragmentPairs log2FoldChange PValues AdjustedPValues f1 \
953 5807 (66, 99) -1.546874 1.110615e-09 1.704665e-08 66
f2
953 99
Empty DataFrame
Columns: [index, FragmentPairs, log2FoldChange, PValues, AdjustedPValues, f1, f2]
Index: []
index FragmentPairs log2FoldChange PValues AdjustedPValues f1 f2
823 5802 (66, 94) -1.6697 0.001409 0.005079 66 94
Empty DataFrame
Columns: [index, FragmentPairs, log2FoldChange, PValues, AdjustedPValues, f1, f2]
Index: []
index FragmentPairs log2FoldChange PValues AdjustedPValues f1 f2
1578 5749 (65, 95) -1.108277 0.000009 0.000062 65 95
index FragmentPairs log2FoldChange PValues AdjustedPValues f1 f2
1119 5803 (66, 95) -1.40584 0.000331 0.001424 66 95
Empty DataFrame
Columns: [index, FragmentPairs, log2FoldChange, PValues, AdjustedPValues, f1, f2]
Index: []
index FragmentPairs log2FoldChange PValues AdjustedPValues f1 f2
1131 5750 (65, 96) -1.395014 0.000071 0.000372 65 96
index FragmentPairs log2FoldChange PValues AdjustedPValues f1 f2
596 5804 (66, 96) -1.923141 0.000133 0.000646 66 96
Empty DataFrame
Columns: [index, FragmentPairs, log2FoldChange, PValues, AdjustedPValues, f1, f2]
Index: []
Due to the 4 aminoaciod size nautre of the fragments, important fragments have to be seen as hubs of key residues rather than plain positions. To try to Disambiguate the aminoa acids with higher weight in the top fragments we will use Protein Language Models in the next section.
From the top differentialy regulated fragments we find positions with key reported residues involved in the activation of the protein. The previously reported Y101 - T82 switch appears in the significant hits as well as other important reported structural differences such as position ~99 and ~94 with the key structural switches of residues F99 and Y94 respectively. Interestingly, F99 and Y94 appear more consistently highlighting their higher importance as a activation characteristic.
On the top fragment pairs hits we find previously reported relevant structural contacts, such as the K(67) - Q(96) - Q(95) (KQQ motif) and the F99-L66 pair. Other residues from the β3−α3 loop(residue indices 60−63) and from the N-terminal half of α4 (residues indices 85−90)also appear among the most important parts, matching with the NMR experiments performed for this system.
To find which might be the most important residues for a given fragment, one can use PLM such as ESM2-650M and use the predicted likelihoods as a guide. The next section will ilustrate how to do that. First the distances between c alphas need to be calculated, after that the important residues can be extracted with the SAPLM module.
# Compute distances between c alphas from a pdb
import mdtraj as md
# Load trajectory
traj = md.load("data_NtrC/NtrC_active_nowat.pdb")
# Select only the C-alpha atoms
ca_indices = traj.topology.select('name CA')
# Extract the coordinates of C-alpha atoms
ca_positions = traj.xyz[0][ca_indices]
# Number of points
n_points = ca_positions.shape[0]
# Initialize an empty distance matrix
distance_matrix = np.zeros((n_points, n_points))
# Compute the pairwise distances using loops
for i in range(n_points):
for j in range(n_points):
# Compute Euclidean distance between points i and j
distance_matrix[i, j] = np.sqrt(np.sum((ca_positions[i] - ca_positions[j])**2))
# Convert to angstroms
distance_matrix *= 10
from Allohubpy import SAPLM
# The sequence should match the one used in the simulations
traj = md.load("data_NtrC/NtrC_active_nowat.pdb")
# Create a subset trajectory containing only the protein
protein_traj = traj.atom_slice(traj.topology.select("protein"))
ntrc_sequence = ''.join([str(residue.code) for residue in protein_traj.topology.residues])
# Create the PLM handler. The fragment size for alphabet M32k25 is 4
esm_handler = SAPLM.SAPLM(fragment_size = 4)
esm_handler.set_sequence(ntrc_sequence)
# Extract likelihoods for top most appearing fragments
likelihood_df = esm_handler.fragment_likelihoods(fragment=64, offset=0)
print(likelihood_df)
likelihood_df = esm_handler.fragment_likelihoods(fragment=63, offset=0)
print(likelihood_df)
likelihood_df = esm_handler.fragment_likelihoods(fragment=90, offset=0)
print(likelihood_df)
likelihood_df = esm_handler.fragment_likelihoods(fragment=91, offset=0)
print(likelihood_df)
likelihood_df = esm_handler.fragment_likelihoods(fragment=89, offset=0)
print(likelihood_df)
likelihood_df = esm_handler.fragment_likelihoods(fragment=109, offset=0)
print(likelihood_df)
likelihood_df = esm_handler.fragment_likelihoods(fragment=97, offset=0)
print(likelihood_df)
likelihood_df = esm_handler.fragment_likelihoods(fragment=92, offset=0)
print(likelihood_df)
likelihood_df = esm_handler.fragment_likelihoods(fragment=88, offset=0)
print(likelihood_df)
likelihood_df = esm_handler.fragment_likelihoods(fragment=93, offset=0)
print(likelihood_df)
Amino acid position Amino acid Likelihood 0 65 L 0.997173 1 66 L 0.994718 2 67 K 0.739040 3 68 Q 0.889384 Amino acid position Amino acid Likelihood 0 64 A 0.887619 1 65 L 0.997173 2 66 L 0.994718 3 67 K 0.739040 Amino acid position Amino acid Likelihood 0 91 V 0.699971 1 92 S 0.796207 2 93 A 0.990187 3 94 Y 0.023101 Amino acid position Amino acid Likelihood 0 92 S 0.796207 1 93 A 0.990187 2 94 Y 0.023101 3 95 Q 0.803867 Amino acid position Amino acid Likelihood 0 90 A 0.992293 1 91 V 0.699971 2 92 S 0.796207 3 93 A 0.990187 Amino acid position Amino acid Likelihood 0 110 E 0.669687 1 111 A 0.648538 2 112 V 0.874420 3 113 A 0.853307 Amino acid position Amino acid Likelihood 0 98 A 0.997481 1 99 F 0.970710 2 100 D 0.952278 3 101 Y 0.927816 Amino acid position Amino acid Likelihood 0 93 A 0.990187 1 94 Y 0.023101 2 95 Q 0.803867 3 96 Q 0.791222 Amino acid position Amino acid Likelihood 0 89 A 0.742131 1 90 A 0.992293 2 91 V 0.699971 3 92 S 0.796207 Amino acid position Amino acid Likelihood 0 94 Y 0.023101 1 95 Q 0.803867 2 96 Q 0.791222 3 97 G 0.996706
Finally, we can examine possible comuniaction path between the phosphorilation site D54 which induces the shift to the active conformation of NtrC to positions found in the signaling surface that were picked up as significant in the previous steps (88-100).
importlib.reload(SANetwork)
# We are interested in the signal transmition from D54
SAgraph = SANetwork.SANetWork(traj_list= mi_ntrc_active_100_1 + mi_ntrc_active_100_2 + mi_ntrc_active_100_3, distance_limit=7)
# Load the distances. The fragment size for M32k25 is 4.
SAgraph.set_distance_matrix(matrix=distance_matrix, fragment_size=4)
# The pval_threshold filters coupled pairs with low significance
SAgraph.create_graph(threshold=50)
# list of fragments of interest
start = [54,55,56]
end = [i for i in range(88,101)]
# Subgraph is a networkx object with the nodes and edges of the shortest paths connecting those residues
# Shortest_pathd ans shortest_distances are list of the shortest paths and their distances respectively.
# z_score provides an estimate of how statistically coupled the two sites are
subgraph, shortest_paths, shortest_distances, z_score = SAgraph.identify_preferential_connections(start_fragments=start, end_fragments=end)
# Create representation of the graph
Allohub_plots.plot_SA_graph(subgraph, start, end, action="show")